From 2ba459685204af53b034d269d5cdb3059d4b471e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Mar 2015 13:12:32 +0100 Subject: KVM: s390: store the breaking-event address on pgm interrupts If the PER-3 facility is installed, the breaking-event address is to be stored in the low core. There is no facility bit for PER-3 in stfl(e) and Linux always uses the value at address 272 no matter if PER-3 is available or not. We can't hide its existence from the guest. All program interrupts injected via the SIE automatically store this information if the PER-3 facility is available in the hypervisor. Also the itdb contains the address automatically. As there is no switch to turn this mechanism off, let's simply make it consistent and also store the breaking event address in case of manual program interrupt injection. Reviewed-by: Jens Freimann Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- arch/s390/kvm/interrupt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2afec6006def..2361b8ed0a50 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -585,6 +585,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) kvm_s390_rewind_psw(vcpu, ilc); rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); + rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, + (u64 *) __LC_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_INT_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, -- cgit v1.2.3-58-ga151 From a3ed8dae6e3db479ca275883ba7fe994170b0ae6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 18 Mar 2015 13:54:31 +0100 Subject: KVM: s390: enable more features that need no hypervisor changes After some review about what these facilities do, the following facilities will work under KVM and can, therefore, be reported to the guest if the cpu model and the host cpu provide this bit. There are plans underway to make the whole bit thing more readable, but its not yet finished. So here are some last bit changes and we enhance the KVM mask with: 9 The sense-running-status facility is installed in the z/Architecture architectural mode. ---> handled by SIE or KVM 10 The conditional-SSKE facility is installed in the z/Architecture architectural mode. ---> handled by SIE. KVM will retry SIE 13 The IPTE-range facility is installed in the z/Architecture architectural mode. ---> handled by SIE. KVM will retry SIE 36 The enhanced-monitor facility is installed in the z/Architecture architectural mode. ---> handled by SIE 47 The CMPSC-enhancement facility is installed in the z/Architecture architectural mode. ---> handled by SIE 48 The decimal-floating-point zoned-conversion facility is installed in the z/Architecture architectural mode. ---> handled by SIE 49 The execution-hint, load-and-trap, miscellaneous- instruction-extensions and processor-assist ---> handled by SIE 51 The local-TLB-clearing facility is installed in the z/Architecture architectural mode. ---> handled by SIE 52 The interlocked-access facility 2 is installed. ---> handled by SIE 53 The load/store-on-condition facility 2 and load-and- zero-rightmost-byte facility are installed in the z/Architecture architectural mode. ---> handled by SIE 57 The message-security-assist-extension-5 facility is installed in the z/Architecture architectural mode. ---> handled by SIE 66 The reset-reference-bits-multiple facility is installed in the z/Architecture architectural mode. ---> handled by SIE. KVM will retry SIE 80 The decimal-floating-point packed-conversion facility is installed in the z/Architecture architectural mode. ---> handled by SIE Signed-off-by: Christian Borntraeger Tested-by: Michael Mueller Acked-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9072127bd51b..a1308859887d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -105,8 +105,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { /* upper facilities limit for kvm */ unsigned long kvm_s390_fac_list_mask[] = { - 0xff82fffbf4fc2000UL, - 0x005c000000000000UL, + 0xffe6fffbfcfdfc40UL, + 0x205c800000000000UL, }; unsigned long kvm_s390_fac_list_mask_size(void) -- cgit v1.2.3-58-ga151 From 94aa033efcac47b09db22cb561e135baf37b7887 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 16 Mar 2015 12:17:13 +0100 Subject: KVM: s390: fix get_all_floating_irqs This fixes a bug introduced with commit c05c4186bbe4 ("KVM: s390: add floating irq controller"). get_all_floating_irqs() does copy_to_user() while holding a spin lock. Let's fix this by filling a temporary buffer first and copy it to userspace after giving up the lock. Cc: # 3.18+: 69a8d4562638 KVM: s390: no need to hold... Reviewed-by: David Hildenbrand Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- Documentation/virtual/kvm/devices/s390_flic.txt | 3 ++ arch/s390/kvm/interrupt.c | 58 ++++++++++++++----------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 4ceef53164b0..d1ad9d5cae46 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -27,6 +27,9 @@ Groups: Copies all floating interrupts into a buffer provided by userspace. When the buffer is too small it returns -ENOMEM, which is the indication for userspace to try again with a bigger buffer. + -ENOBUFS is returned when the allocation of a kernelspace buffer has + failed. + -EFAULT is returned when copying data to userspace failed. All interrupts remain pending, i.e. are not deleted from the list of currently pending interrupts. attr->addr contains the userspace address of the buffer into which all diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2361b8ed0a50..5ebd500e6400 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1477,61 +1478,66 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) spin_unlock(&fi->lock); } -static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, - u8 *addr) +static void inti_to_irq(struct kvm_s390_interrupt_info *inti, + struct kvm_s390_irq *irq) { - struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; - struct kvm_s390_irq irq = {0}; - - irq.type = inti->type; + irq->type = inti->type; switch (inti->type) { case KVM_S390_INT_PFAULT_INIT: case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: - irq.u.ext = inti->ext; + irq->u.ext = inti->ext; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - irq.u.io = inti->io; + irq->u.io = inti->io; break; case KVM_S390_MCHK: - irq.u.mchk = inti->mchk; + irq->u.mchk = inti->mchk; break; - default: - return -EINVAL; } - - if (copy_to_user(uptr, &irq, sizeof(irq))) - return -EFAULT; - - return 0; } -static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) +static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) { struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; + struct kvm_s390_irq *buf; + int max_irqs; int ret = 0; int n = 0; + if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) + return -EINVAL; + + /* + * We are already using -ENOMEM to signal + * userspace it may retry with a bigger buffer, + * so we need to use something else for this case + */ + buf = vzalloc(len); + if (!buf) + return -ENOBUFS; + + max_irqs = len / sizeof(struct kvm_s390_irq); + fi = &kvm->arch.float_int; spin_lock(&fi->lock); - list_for_each_entry(inti, &fi->list, list) { - if (len < sizeof(struct kvm_s390_irq)) { + if (n == max_irqs) { /* signal userspace to try again */ ret = -ENOMEM; break; } - ret = copy_irq_to_user(inti, buf); - if (ret) - break; - buf += sizeof(struct kvm_s390_irq); - len -= sizeof(struct kvm_s390_irq); + inti_to_irq(inti, &buf[n]); n++; } - spin_unlock(&fi->lock); + if (!ret && n > 0) { + if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n)) + ret = -EFAULT; + } + vfree(buf); return ret < 0 ? ret : n; } @@ -1542,7 +1548,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) switch (attr->group) { case KVM_DEV_FLIC_GET_ALL_IRQS: - r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, + r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr, attr->attr); break; default: -- cgit v1.2.3-58-ga151 From 6d3da241416e6088f83a7ff1f37fb6bb518d9bc8 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Wed, 3 Jul 2013 15:18:35 +0200 Subject: KVM: s390: deliver floating interrupts in order of priority This patch makes interrupt handling compliant to the z/Architecture Principles of Operation with regard to interrupt priorities. Add a bitmap for pending floating interrupts. Each bit relates to a interrupt type and its list. A turned on bit indicates that a list contains items (interrupts) which need to be delivered. When delivering interrupts on a cpu we can merge the existing bitmap for cpu-local interrupts and floating interrupts and have a single mechanism for delivery. Currently we have one list for all kinds of floating interrupts and a corresponding spin lock. This patch adds a separate list per interrupt type. An exception to this are service signal and machine check interrupts, as there can be only one pending interrupt at a time. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- arch/s390/include/asm/kvm_host.h | 30 +- arch/s390/kvm/interrupt.c | 832 ++++++++++++++++++++++----------------- arch/s390/kvm/kvm-s390.c | 4 +- arch/s390/kvm/kvm-s390.h | 2 +- arch/s390/kvm/priv.c | 9 +- 5 files changed, 510 insertions(+), 367 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b8d1e97fb201..d01fc588b5c3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -344,6 +344,11 @@ enum irq_types { IRQ_PEND_COUNT }; +/* We have 2M for virtio device descriptor pages. Smallest amount of + * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381 + */ +#define KVM_S390_MAX_VIRTIO_IRQS 87381 + /* * Repressible (non-floating) machine check interrupts * subclass bits in MCIC @@ -421,13 +426,32 @@ struct kvm_s390_local_interrupt { unsigned long pending_irqs; }; +#define FIRQ_LIST_IO_ISC_0 0 +#define FIRQ_LIST_IO_ISC_1 1 +#define FIRQ_LIST_IO_ISC_2 2 +#define FIRQ_LIST_IO_ISC_3 3 +#define FIRQ_LIST_IO_ISC_4 4 +#define FIRQ_LIST_IO_ISC_5 5 +#define FIRQ_LIST_IO_ISC_6 6 +#define FIRQ_LIST_IO_ISC_7 7 +#define FIRQ_LIST_PFAULT 8 +#define FIRQ_LIST_VIRTIO 9 +#define FIRQ_LIST_COUNT 10 +#define FIRQ_CNTR_IO 0 +#define FIRQ_CNTR_SERVICE 1 +#define FIRQ_CNTR_VIRTIO 2 +#define FIRQ_CNTR_PFAULT 3 +#define FIRQ_MAX_COUNT 4 + struct kvm_s390_float_interrupt { + unsigned long pending_irqs; spinlock_t lock; - struct list_head list; - atomic_t active; + struct list_head lists[FIRQ_LIST_COUNT]; + int counters[FIRQ_MAX_COUNT]; + struct kvm_s390_mchk_info mchk; + struct kvm_s390_ext_info srv_signal; int next_rr_cpu; unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; - unsigned int irq_count; }; struct kvm_hw_wp_info_arch { diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5ebd500e6400..2872fdb4d01a 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -34,11 +35,6 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 -static int is_ioint(u64 type) -{ - return ((type & 0xfffe0000u) != 0xfffe0000u); -} - int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -74,70 +70,25 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) return 1; } -static u64 int_word_to_isc_bits(u32 int_word) +static inline int is_ioirq(unsigned long irq_type) { - u8 isc = (int_word & 0x38000000) >> 27; + return ((irq_type >= IRQ_PEND_IO_ISC_0) && + (irq_type <= IRQ_PEND_IO_ISC_7)); +} +static uint64_t isc_to_isc_bits(int isc) +{ return (0x80 >> isc) << 24; } -static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static inline u8 int_word_to_isc(u32 int_word) { - switch (inti->type) { - case KVM_S390_INT_EXTERNAL_CALL: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) - return 1; - return 0; - case KVM_S390_INT_EMERGENCY: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) - return 1; - return 0; - case KVM_S390_INT_CLOCK_COMP: - return ckc_interrupts_enabled(vcpu); - case KVM_S390_INT_CPU_TIMER: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x400ul) - return 1; - return 0; - case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_INIT: - case KVM_S390_INT_PFAULT_DONE: - case KVM_S390_INT_VIRTIO: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x200ul) - return 1; - return 0; - case KVM_S390_PROGRAM_INT: - case KVM_S390_SIGP_STOP: - case KVM_S390_SIGP_SET_PREFIX: - case KVM_S390_RESTART: - return 1; - case KVM_S390_MCHK: - if (psw_mchk_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) - return 1; - return 0; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (psw_ioint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[6] & - int_word_to_isc_bits(inti->io.io_int_word)) - return 1; - return 0; - default: - printk(KERN_WARNING "illegal interrupt type %llx\n", - inti->type); - BUG(); - } - return 0; + return (int_word & 0x38000000) >> 27; +} + +static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.float_int.pending_irqs; } static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) @@ -145,12 +96,31 @@ static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) return vcpu->arch.local_int.pending_irqs; } -static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) +static unsigned long disable_iscs(struct kvm_vcpu *vcpu, + unsigned long active_mask) { - unsigned long active_mask = pending_local_irqs(vcpu); + int i; + + for (i = 0; i <= MAX_ISC; i++) + if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i))) + active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i)); + + return active_mask; +} + +static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) +{ + unsigned long active_mask; + + active_mask = pending_local_irqs(vcpu); + active_mask |= pending_floating_irqs(vcpu); if (psw_extint_disabled(vcpu)) active_mask &= ~IRQ_PEND_EXT_MASK; + if (psw_ioint_disabled(vcpu)) + active_mask &= ~IRQ_PEND_IO_MASK; + else + active_mask = disable_iscs(vcpu, active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul)) __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul)) @@ -159,8 +129,13 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul)) __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) + __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask); if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; + if (!(vcpu->arch.sie_block->gcr[14] & + vcpu->kvm->arch.float_int.mchk.cr14)) + __clear_bit(IRQ_PEND_MCHK_REP, &active_mask); /* * STOP irqs will never be actively delivered. They are triggered via @@ -202,6 +177,16 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); } +static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) +{ + if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK)) + return; + else if (psw_ioint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_IO_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR6; +} + static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) { if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK)) @@ -228,43 +213,15 @@ static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu) __set_cpuflag(vcpu, CPUSTAT_STOP_INT); } -/* Set interception request for non-deliverable local interrupts */ -static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) +/* Set interception request for non-deliverable interrupts */ +static void set_intercept_indicators(struct kvm_vcpu *vcpu) { + set_intercept_indicators_io(vcpu); set_intercept_indicators_ext(vcpu); set_intercept_indicators_mchk(vcpu); set_intercept_indicators_stop(vcpu); } -static void __set_intercept_indicator(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - switch (inti->type) { - case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_DONE: - case KVM_S390_INT_VIRTIO: - if (psw_extint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_EXT_INT); - else - vcpu->arch.sie_block->lctl |= LCTL_CR0; - break; - case KVM_S390_MCHK: - if (psw_mchk_disabled(vcpu)) - vcpu->arch.sie_block->ictl |= ICTL_LPSW; - else - vcpu->arch.sie_block->lctl |= LCTL_CR14; - break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (psw_ioint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_IO_INT); - else - vcpu->arch.sie_block->lctl |= LCTL_CR6; - break; - default: - BUG(); - } -} - static u16 get_ilc(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { @@ -350,42 +307,72 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu) static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) { + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_mchk_info mchk; + struct kvm_s390_mchk_info mchk = {}; unsigned long adtl_status_addr; - int rc; + int deliver = 0; + int rc = 0; + spin_lock(&fi->lock); spin_lock(&li->lock); - mchk = li->irq.mchk; + if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) || + test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) { + /* + * If there was an exigent machine check pending, then any + * repressible machine checks that might have been pending + * are indicated along with it, so always clear bits for + * repressible and exigent interrupts + */ + mchk = li->irq.mchk; + clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); + clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); + memset(&li->irq.mchk, 0, sizeof(mchk)); + deliver = 1; + } /* - * If there was an exigent machine check pending, then any repressible - * machine checks that might have been pending are indicated along - * with it, so always clear both bits + * We indicate floating repressible conditions along with + * other pending conditions. Channel Report Pending and Channel + * Subsystem damage are the only two and and are indicated by + * bits in mcic and masked in cr14. */ - clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); - clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); - memset(&li->irq.mchk, 0, sizeof(mchk)); + if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) { + mchk.mcic |= fi->mchk.mcic; + mchk.cr14 |= fi->mchk.cr14; + memset(&fi->mchk, 0, sizeof(mchk)); + deliver = 1; + } spin_unlock(&li->lock); + spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - mchk.mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, - mchk.cr14, mchk.mcic); - - rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, - &adtl_status_addr, sizeof(unsigned long)); - rc |= kvm_s390_vcpu_store_adtl_status(vcpu, adtl_status_addr); - rc |= put_guest_lc(vcpu, mchk.mcic, - (u64 __user *) __LC_MCCK_CODE); - rc |= put_guest_lc(vcpu, mchk.failing_storage_address, - (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); - rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, - &mchk.fixed_logout, sizeof(mchk.fixed_logout)); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (deliver) { + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_MCHK, + mchk.cr14, mchk.mcic); + + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, + &adtl_status_addr, + sizeof(unsigned long)); + rc |= kvm_s390_vcpu_store_adtl_status(vcpu, + adtl_status_addr); + rc |= put_guest_lc(vcpu, mchk.mcic, + (u64 __user *) __LC_MCCK_CODE); + rc |= put_guest_lc(vcpu, mchk.failing_storage_address, + (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); + rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, + &mchk.fixed_logout, + sizeof(mchk.fixed_logout)); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + } return rc ? -EFAULT : 0; } @@ -597,16 +584,27 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) return rc ? -EFAULT : 0; } -static int __must_check __deliver_service(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_service(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_ext_info ext; + int rc = 0; + + spin_lock(&fi->lock); + if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) { + spin_unlock(&fi->lock); + return 0; + } + ext = fi->srv_signal; + memset(&fi->srv_signal, 0, sizeof(ext)); + clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", - inti->ext.ext_params); + ext.ext_params); vcpu->stat.deliver_service_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, + ext.ext_params, 0); rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE); rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); @@ -614,106 +612,146 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, + rc |= put_guest_lc(vcpu, ext.ext_params, (u32 *)__LC_EXT_PARAMS); + return rc ? -EFAULT : 0; } -static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, - KVM_S390_INT_PFAULT_DONE, 0, - inti->ext.ext_params2); + spin_lock(&fi->lock); + inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT], + struct kvm_s390_interrupt_info, + list); + if (inti) { + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_DONE, 0, + inti->ext.ext_params2); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_PFAULT] -= 1; + } + if (list_empty(&fi->lists[FIRQ_LIST_PFAULT])) + clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs); + spin_unlock(&fi->lock); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + if (inti) { + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_DONE, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + kfree(inti); + } return rc ? -EFAULT : 0; } -static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; - VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", - inti->ext.ext_params, inti->ext.ext_params2); - vcpu->stat.deliver_virtio_interrupt++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, - inti->ext.ext_params2); + spin_lock(&fi->lock); + inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO], + struct kvm_s390_interrupt_info, + list); + if (inti) { + VCPU_EVENT(vcpu, 4, + "interrupt: virtio parm:%x,parm64:%llx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + inti->type, + inti->ext.ext_params, + inti->ext.ext_params2); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_VIRTIO] -= 1; + } + if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO])) + clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs); + spin_unlock(&fi->lock); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + if (inti) { + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, VIRTIO_PARAM, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + kfree(inti); + } return rc ? -EFAULT : 0; } static int __must_check __deliver_io(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) + unsigned long irq_type) { - int rc; + struct list_head *isc_list; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti = NULL; + int rc = 0; - VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); - vcpu->stat.deliver_io_int++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - ((__u32)inti->io.subchannel_id << 16) | - inti->io.subchannel_nr, - ((__u64)inti->io.io_int_parm << 32) | - inti->io.io_int_word); - - rc = put_guest_lc(vcpu, inti->io.subchannel_id, - (u16 *)__LC_SUBCHANNEL_ID); - rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, - (u16 *)__LC_SUBCHANNEL_NR); - rc |= put_guest_lc(vcpu, inti->io.io_int_parm, - (u32 *)__LC_IO_INT_PARM); - rc |= put_guest_lc(vcpu, inti->io.io_int_word, - (u32 *)__LC_IO_INT_WORD); - rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - return rc ? -EFAULT : 0; -} + fi = &vcpu->kvm->arch.float_int; -static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - struct kvm_s390_mchk_info *mchk = &inti->mchk; - int rc; + spin_lock(&fi->lock); + isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0]; + inti = list_first_entry_or_null(isc_list, + struct kvm_s390_interrupt_info, + list); + if (inti) { + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + inti->type, + ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr, + ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_IO] -= 1; + } + if (list_empty(isc_list)) + clear_bit(irq_type, &fi->pending_irqs); + spin_unlock(&fi->lock); + + if (inti) { + rc = put_guest_lc(vcpu, inti->io.subchannel_id, + (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, + (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, inti->io.io_int_parm, + (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, inti->io.io_int_word, + (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + kfree(inti); + } - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - mchk->mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, - mchk->cr14, mchk->mcic); - - rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest_lc(vcpu, mchk->mcic, - (u64 __user *) __LC_MCCK_CODE); - rc |= put_guest_lc(vcpu, mchk->failing_storage_address, - (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); - rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, - &mchk->fixed_logout, sizeof(mchk->fixed_logout)); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); return rc ? -EFAULT : 0; } @@ -721,6 +759,7 @@ typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); static const deliver_irq_t deliver_irq_funcs[] = { [IRQ_PEND_MCHK_EX] = __deliver_machine_check, + [IRQ_PEND_MCHK_REP] = __deliver_machine_check, [IRQ_PEND_PROG] = __deliver_prog, [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, @@ -729,36 +768,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { [IRQ_PEND_RESTART] = __deliver_restart, [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, + [IRQ_PEND_EXT_SERVICE] = __deliver_service, + [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done, + [IRQ_PEND_VIRTIO] = __deliver_virtio, }; -static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - int rc; - - switch (inti->type) { - case KVM_S390_INT_SERVICE: - rc = __deliver_service(vcpu, inti); - break; - case KVM_S390_INT_PFAULT_DONE: - rc = __deliver_pfault_done(vcpu, inti); - break; - case KVM_S390_INT_VIRTIO: - rc = __deliver_virtio(vcpu, inti); - break; - case KVM_S390_MCHK: - rc = __deliver_mchk_floating(vcpu, inti); - break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - rc = __deliver_io(vcpu, inti); - break; - default: - BUG(); - } - - return rc; -} - /* Check whether an external call is pending (deliverable or not) */ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { @@ -774,21 +788,9 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) { - struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; - struct kvm_s390_interrupt_info *inti; int rc; - rc = !!deliverable_local_irqs(vcpu); - - if ((!rc) && atomic_read(&fi->active)) { - spin_lock(&fi->lock); - list_for_each_entry(inti, &fi->list, list) - if (__interrupt_is_deliverable(vcpu, inti)) { - rc = 1; - break; - } - spin_unlock(&fi->lock); - } + rc = !!deliverable_irqs(vcpu); if (!rc && kvm_cpu_has_pending_timer(vcpu)) rc = 1; @@ -907,13 +909,10 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; - struct kvm_s390_interrupt_info *n, *inti = NULL; deliver_irq_t func; - int deliver; int rc = 0; unsigned long irq_type; - unsigned long deliverable_irqs; + unsigned long irqs; __reset_intercept_indicators(vcpu); @@ -923,44 +922,27 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); do { - deliverable_irqs = deliverable_local_irqs(vcpu); + irqs = deliverable_irqs(vcpu); /* bits are in the order of interrupt priority */ - irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT); + irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT); if (irq_type == IRQ_PEND_COUNT) break; - func = deliver_irq_funcs[irq_type]; - if (!func) { - WARN_ON_ONCE(func == NULL); - clear_bit(irq_type, &li->pending_irqs); - continue; + if (is_ioirq(irq_type)) { + rc = __deliver_io(vcpu, irq_type); + } else { + func = deliver_irq_funcs[irq_type]; + if (!func) { + WARN_ON_ONCE(func == NULL); + clear_bit(irq_type, &li->pending_irqs); + continue; + } + rc = func(vcpu); } - rc = func(vcpu); - } while (!rc && irq_type != IRQ_PEND_COUNT); - - set_intercept_indicators_local(vcpu); + if (rc) + break; + } while (!rc); - if (!rc && atomic_read(&fi->active)) { - do { - deliver = 0; - spin_lock(&fi->lock); - list_for_each_entry_safe(inti, n, &fi->list, list) { - if (__interrupt_is_deliverable(vcpu, inti)) { - list_del(&inti->list); - fi->irq_count--; - deliver = 1; - break; - } - __set_intercept_indicator(vcpu, inti); - } - if (list_empty(&fi->list)) - atomic_set(&fi->active, 0); - spin_unlock(&fi->lock); - if (deliver) { - rc = __deliver_floating_interrupt(vcpu, inti); - kfree(inti); - } - } while (!rc && deliver); - } + set_intercept_indicators(vcpu); return rc; } @@ -1195,80 +1177,182 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu) return 0; } +static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, + int isc, u32 schid) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; + struct kvm_s390_interrupt_info *iter; + u16 id = (schid & 0xffff0000U) >> 16; + u16 nr = schid & 0x0000ffffU; + spin_lock(&fi->lock); + list_for_each_entry(iter, isc_list, list) { + if (schid && (id != iter->io.subchannel_id || + nr != iter->io.subchannel_nr)) + continue; + /* found an appropriate entry */ + list_del_init(&iter->list); + fi->counters[FIRQ_CNTR_IO] -= 1; + if (list_empty(isc_list)) + clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + spin_unlock(&fi->lock); + return iter; + } + spin_unlock(&fi->lock); + return NULL; +} + +/* + * Dequeue and return an I/O interrupt matching any of the interruption + * subclasses as designated by the isc mask in cr6 and the schid (if != 0). + */ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, - u64 cr6, u64 schid) + u64 isc_mask, u32 schid) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int isc; + + for (isc = 0; isc <= MAX_ISC && !inti; isc++) { + if (isc_mask & isc_to_isc_bits(isc)) + inti = get_io_int(kvm, isc, schid); + } + return inti; +} + +#define SCCB_MASK 0xFFFFFFF8 +#define SCCB_EVENT_PENDING 0x3 + +static int __inject_service(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING; + /* + * Early versions of the QEMU s390 bios will inject several + * service interrupts after another without handling a + * condition code indicating busy. + * We will silently ignore those superfluous sccb values. + * A future version of QEMU will take care of serialization + * of servc requests + */ + if (fi->srv_signal.ext_params & SCCB_MASK) + goto out; + fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK; + set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); +out: + spin_unlock(&fi->lock); + kfree(inti); + return 0; +} + +static int __inject_virtio(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) { + spin_unlock(&fi->lock); + return -EBUSY; + } + fi->counters[FIRQ_CNTR_VIRTIO] += 1; + list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]); + set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs); + spin_unlock(&fi->lock); + return 0; +} + +static int __inject_pfault_done(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + if (fi->counters[FIRQ_CNTR_PFAULT] >= + (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) { + spin_unlock(&fi->lock); + return -EBUSY; + } + fi->counters[FIRQ_CNTR_PFAULT] += 1; + list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]); + set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs); + spin_unlock(&fi->lock); + return 0; +} + +#define CR_PENDING_SUBCLASS 28 +static int __inject_float_mchk(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS); + fi->mchk.mcic |= inti->mchk.mcic; + set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs); + spin_unlock(&fi->lock); + kfree(inti); + return 0; +} + +static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti, *iter; + struct list_head *list; + int isc; - if ((!schid && !cr6) || (schid && cr6)) - return NULL; fi = &kvm->arch.float_int; spin_lock(&fi->lock); - inti = NULL; - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (cr6 && - ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0)) - continue; - if (schid) { - if (((schid & 0x00000000ffff0000) >> 16) != - iter->io.subchannel_id) - continue; - if ((schid & 0x000000000000ffff) != - iter->io.subchannel_nr) - continue; - } - inti = iter; - break; - } - if (inti) { - list_del_init(&inti->list); - fi->irq_count--; + if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) { + spin_unlock(&fi->lock); + return -EBUSY; } - if (list_empty(&fi->list)) - atomic_set(&fi->active, 0); + fi->counters[FIRQ_CNTR_IO] += 1; + + isc = int_word_to_isc(inti->io.io_int_word); + list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; + list_add_tail(&inti->list, list); + set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); spin_unlock(&fi->lock); - return inti; + return 0; } static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *iter; struct kvm_vcpu *dst_vcpu = NULL; int sigcpu; - int rc = 0; + u64 type = READ_ONCE(inti->type); + int rc; fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { + + switch (type) { + case KVM_S390_MCHK: + rc = __inject_float_mchk(kvm, inti); + break; + case KVM_S390_INT_VIRTIO: + rc = __inject_virtio(kvm, inti); + break; + case KVM_S390_INT_SERVICE: + rc = __inject_service(kvm, inti); + break; + case KVM_S390_INT_PFAULT_DONE: + rc = __inject_pfault_done(kvm, inti); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + rc = __inject_io(kvm, inti); + break; + default: rc = -EINVAL; - goto unlock_fi; } - fi->irq_count++; - if (!is_ioint(inti->type)) { - list_add_tail(&inti->list, &fi->list); - } else { - u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + if (rc) + return rc; - /* Keep I/O interrupts sorted in isc order. */ - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (int_word_to_isc_bits(iter->io.io_int_word) - <= isc_bits) - continue; - break; - } - list_add_tail(&inti->list, &iter->list); - } - atomic_set(&fi->active, 1); - if (atomic_read(&kvm->online_vcpus) == 0) - goto unlock_fi; sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); if (sigcpu == KVM_MAX_VCPUS) { do { @@ -1280,7 +1364,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); - switch (inti->type) { + switch (type) { case KVM_S390_MCHK: atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); break; @@ -1293,9 +1377,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } spin_unlock(&li->lock); kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); -unlock_fi: - spin_unlock(&fi->lock); - return rc; + return 0; + } int kvm_s390_inject_vm(struct kvm *kvm, @@ -1462,20 +1545,14 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return rc; } -void kvm_s390_clear_float_irqs(struct kvm *kvm) +static inline void clear_irq_list(struct list_head *_list) { - struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *n, *inti = NULL; + struct kvm_s390_interrupt_info *inti, *n; - fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - list_for_each_entry_safe(inti, n, &fi->list, list) { + list_for_each_entry_safe(inti, n, _list, list) { list_del(&inti->list); kfree(inti); } - fi->irq_count = 0; - atomic_set(&fi->active, 0); - spin_unlock(&fi->lock); } static void inti_to_irq(struct kvm_s390_interrupt_info *inti, @@ -1486,26 +1563,37 @@ static void inti_to_irq(struct kvm_s390_interrupt_info *inti, case KVM_S390_INT_PFAULT_INIT: case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: - case KVM_S390_INT_SERVICE: irq->u.ext = inti->ext; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: irq->u.io = inti->io; break; - case KVM_S390_MCHK: - irq->u.mchk = inti->mchk; - break; } } +void kvm_s390_clear_float_irqs(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + int i; + + spin_lock(&fi->lock); + for (i = 0; i < FIRQ_LIST_COUNT; i++) + clear_irq_list(&fi->lists[i]); + for (i = 0; i < FIRQ_MAX_COUNT; i++) + fi->counters[i] = 0; + spin_unlock(&fi->lock); +}; + static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) { struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; struct kvm_s390_irq *buf; + struct kvm_s390_irq *irq; int max_irqs; int ret = 0; int n = 0; + int i; if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) return -EINVAL; @@ -1523,15 +1611,41 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) fi = &kvm->arch.float_int; spin_lock(&fi->lock); - list_for_each_entry(inti, &fi->list, list) { + for (i = 0; i < FIRQ_LIST_COUNT; i++) { + list_for_each_entry(inti, &fi->lists[i], list) { + if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + goto out; + } + inti_to_irq(inti, &buf[n]); + n++; + } + } + if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) { if (n == max_irqs) { /* signal userspace to try again */ ret = -ENOMEM; - break; + goto out; } - inti_to_irq(inti, &buf[n]); + irq = (struct kvm_s390_irq *) &buf[n]; + irq->type = KVM_S390_INT_SERVICE; + irq->u.ext = fi->srv_signal; n++; } + if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) { + if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + goto out; + } + irq = (struct kvm_s390_irq *) &buf[n]; + irq->type = KVM_S390_MCHK; + irq->u.mchk = fi->mchk; + n++; +} + +out: spin_unlock(&fi->lock); if (!ret && n > 0) { if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n)) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a1308859887d..dbc9ca34d9da 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "kvm-s390.h" #include "gaccess.h" @@ -1069,7 +1070,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) goto out_err; spin_lock_init(&kvm->arch.float_int.lock); - INIT_LIST_HEAD(&kvm->arch.float_int.list); + for (i = 0; i < FIRQ_LIST_COUNT; i++) + INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); init_waitqueue_head(&kvm->arch.ipte_wq); mutex_init(&kvm->arch.ipte_mutex); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c5aefef158e5..343644a59392 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -178,7 +178,7 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, - u64 cr6, u64 schid); + u64 isc_mask, u32 schid); int kvm_s390_reinject_io_int(struct kvm *kvm, struct kvm_s390_interrupt_info *inti); int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 5e4658d20c77..d22d8ee1ff9d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -294,10 +294,13 @@ reinject_interrupt: static int handle_tsch(struct kvm_vcpu *vcpu) { - struct kvm_s390_interrupt_info *inti; + struct kvm_s390_interrupt_info *inti = NULL; + const u64 isc_mask = 0xffUL << 24; /* all iscs set */ - inti = kvm_s390_get_io_int(vcpu->kvm, 0, - vcpu->run->s.regs.gprs[1]); + /* a valid schid has at least one bit set */ + if (vcpu->run->s.regs.gprs[1]) + inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask, + vcpu->run->s.regs.gprs[1]); /* * Prepare exit to userspace. -- cgit v1.2.3-58-ga151 From b4aec92567f3146167cbc262c686ff73730aa4ca Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Dec 2014 15:55:42 +0100 Subject: KVM: s390: cpu timer irq priority We now have a mechanism for delivering interrupts according to their priority. Let's inject them using our new infrastructure (instead of letting only hardware handle them), so we can be sure that the irq priorities are satisfied. For s390, the cpu timer and the clock comparator are to be checked for common code kvm_cpu_has_pending_timer(), although the cpu timer is only stepped when the guest is being executed. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- arch/s390/kvm/interrupt.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2872fdb4d01a..8a0786ccaf68 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -70,6 +70,26 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) return 1; } +static int ckc_irq_pending(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.sie_block->ckc < + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) + return 0; + return ckc_interrupts_enabled(vcpu); +} + +static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) +{ + return !psw_extint_disabled(vcpu) && + (vcpu->arch.sie_block->gcr[0] & 0x400ul); +} + +static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.sie_block->cputm >> 63) && + cpu_timer_interrupts_enabled(vcpu); +} + static inline int is_ioirq(unsigned long irq_type) { return ((irq_type >= IRQ_PEND_IO_ISC_0) && @@ -809,12 +829,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - if (!(vcpu->arch.sie_block->ckc < - get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) - return 0; - if (!ckc_interrupts_enabled(vcpu)) - return 0; - return 1; + return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); } int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) @@ -918,9 +933,14 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) /* pending ckc conditions might have been invalidated */ clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); - if (kvm_cpu_has_pending_timer(vcpu)) + if (ckc_irq_pending(vcpu)) set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + /* pending cpu timer conditions might have been invalidated */ + clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + if (cpu_timer_irq_pending(vcpu)) + set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + do { irqs = deliverable_irqs(vcpu); /* bits are in the order of interrupt priority */ -- cgit v1.2.3-58-ga151 From 47b43c52ee4b0425449d1b2b1eedca7f6b7a578a Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Tue, 11 Nov 2014 20:57:06 +0100 Subject: KVM: s390: add ioctl to inject local interrupts We have introduced struct kvm_s390_irq a while ago which allows to inject all kinds of interrupts as defined in the Principles of Operation. Add ioctl to inject interrupts with the extended struct kvm_s390_irq Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- Documentation/virtual/kvm/api.txt | 56 +++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 10 +++++++ include/uapi/linux/kvm.h | 3 +++ virt/kvm/kvm_main.c | 2 +- 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0d7fc66289a0..a7c651d0dc63 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2820,6 +2820,62 @@ single frame starting at start_gfn for count frames. Note: If any architecturally invalid key value is found in the given data then the ioctl will return -EINVAL. +4.92 KVM_S390_IRQ + +Capability: KVM_CAP_S390_INJECT_IRQ +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq (in) +Returns: 0 on success, -1 on error +Errors: + EINVAL: interrupt type is invalid + type is KVM_S390_SIGP_STOP and flag parameter is invalid value + type is KVM_S390_INT_EXTERNAL_CALL and code is bigger + than the maximum of VCPUs + EBUSY: type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped + type is KVM_S390_SIGP_STOP and a stop irq is already pending + type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt + is already pending + +Allows to inject an interrupt to the guest. + +Using struct kvm_s390_irq as a parameter allows +to inject additional payload which is not +possible via KVM_S390_INTERRUPT. + +Interrupt parameters are passed via kvm_s390_irq: + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +type can be one of the following: + +KVM_S390_SIGP_STOP - sigp stop; parameter in .stop +KVM_S390_PROGRAM_INT - program check; parameters in .pgm +KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix +KVM_S390_RESTART - restart; no parameters +KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters +KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters +KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg +KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall +KVM_S390_MCHK - machine check interrupt; parameters in .mchk + + +Note that the vcpu ioctl is asynchronous to vcpu execution. + + 5. The kvm_run structure ------------------------ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dbc9ca34d9da..8bc25d405edf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -177,6 +177,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: + case KVM_CAP_S390_INJECT_IRQ: case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: case KVM_CAP_S390_SKEYS: @@ -2391,6 +2392,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, long r; switch (ioctl) { + case KVM_S390_IRQ: { + struct kvm_s390_irq s390irq; + + r = -EFAULT; + if (copy_from_user(&s390irq, argp, sizeof(s390irq))) + break; + r = kvm_s390_inject_vcpu(vcpu, &s390irq); + break; + } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; struct kvm_s390_irq s390irq; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1162ef7a3fa1..c0632e87a00f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -802,6 +802,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_MEM_OP 108 #define KVM_CAP_S390_USER_STSI 109 #define KVM_CAP_S390_SKEYS 110 +#define KVM_CAP_S390_INJECT_IRQ 113 #ifdef KVM_CAP_IRQ_ROUTING @@ -1182,6 +1183,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_S390_SKEYS */ #define KVM_S390_GET_SKEYS _IOW(KVMIO, 0xb2, struct kvm_s390_skeys) #define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) +/* Available with KVM_CAP_S390_INJECT_IRQ */ +#define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a1093700f3a4..34310a8d24b9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2118,7 +2118,7 @@ static long kvm_vcpu_ioctl(struct file *filp, * Special cases: vcpu ioctls that are asynchronous to vcpu execution, * so vcpu_load() would break it. */ - if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) + if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) return kvm_arch_vcpu_ioctl(filp, ioctl, arg); #endif -- cgit v1.2.3-58-ga151 From 79e87a103de1eda0cb4d726cd8581798e2d38f3e Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Thu, 19 Mar 2015 15:12:12 +0100 Subject: KVM: s390: refactor vcpu injection function Let's provide a version of kvm_s390_inject_vcpu() that does not acquire the local-interrupt lock and skips waking up the vcpu. To be used in a later patch for vcpu-local interrupt migration, where we are already holding the lock. Reviewed-by: David Hildenbrand Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- arch/s390/kvm/interrupt.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 8a0786ccaf68..bc0988093c5b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1514,12 +1514,10 @@ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) spin_unlock(&li->lock); } -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc; - spin_lock(&li->lock); switch (irq->type) { case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", @@ -1559,6 +1557,17 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) default: rc = -EINVAL; } + + return rc; +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + spin_lock(&li->lock); + rc = do_inject_vcpu(vcpu, irq); spin_unlock(&li->lock); if (!rc) kvm_s390_vcpu_wakeup(vcpu); -- cgit v1.2.3-58-ga151 From 816c7667ea97c61884e014cfeedaede5b67b0e58 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 24 Nov 2014 17:13:46 +0100 Subject: KVM: s390: migrate vcpu interrupt state This patch adds support to migrate vcpu interrupts. Two new vcpu ioctls are added which get/set the complete status of pending interrupts in one go. The ioctls are marked as available with the new capability KVM_CAP_S390_IRQ_STATE. We can not use a ONEREG, as the number of pending local interrupts is not constant and depends on the number of CPUs. To retrieve the interrupt state we add an ioctl KVM_S390_GET_IRQ_STATE. Its input parameter is a pointer to a struct kvm_s390_irq_state which has a buffer and length. For all currently pending interrupts, we copy a struct kvm_s390_irq into the buffer and pass it to userspace. To store interrupt state into a buffer provided by userspace, we add an ioctl KVM_S390_SET_IRQ_STATE. It passes a struct kvm_s390_irq_state into the kernel and injects all interrupts contained in the buffer. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck --- Documentation/virtual/kvm/api.txt | 61 +++++++++++++++++ arch/s390/kvm/interrupt.c | 140 ++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 36 ++++++++++ arch/s390/kvm/kvm-s390.h | 4 ++ include/uapi/linux/kvm.h | 11 +++ 5 files changed, 252 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7c651d0dc63..18fb7630e2ad 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2875,6 +2875,67 @@ KVM_S390_MCHK - machine check interrupt; parameters in .mchk Note that the vcpu ioctl is asynchronous to vcpu execution. +4.94 KVM_S390_GET_IRQ_STATE + +Capability: KVM_CAP_S390_IRQ_STATE +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq_state (out) +Returns: >= number of bytes copied into buffer, + -EINVAL if buffer size is 0, + -ENOBUFS if buffer size is too small to fit all pending interrupts, + -EFAULT if the buffer address was invalid + +This ioctl allows userspace to retrieve the complete state of all currently +pending interrupts in a single buffer. Use cases include migration +and introspection. The parameter structure contains the address of a +userspace buffer and its length: + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; + __u32 len; + __u32 reserved[4]; +}; + +Userspace passes in the above struct and for each pending interrupt a +struct kvm_s390_irq is copied to the provided buffer. + +If -ENOBUFS is returned the buffer provided was too small and userspace +may retry with a bigger buffer. + +4.95 KVM_S390_SET_IRQ_STATE + +Capability: KVM_CAP_S390_IRQ_STATE +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq_state (in) +Returns: 0 on success, + -EFAULT if the buffer address was invalid, + -EINVAL for an invalid buffer length (see below), + -EBUSY if there were already interrupts pending, + errors occurring when actually injecting the + interrupt. See KVM_S390_IRQ. + +This ioctl allows userspace to set the complete state of all cpu-local +interrupts currently pending for the vcpu. It is intended for restoring +interrupt state after a migration. The input parameter is a userspace buffer +containing a struct kvm_s390_irq_state: + +struct kvm_s390_irq_state { + __u64 buf; + __u32 len; + __u32 pad; +}; + +The userspace memory referenced by buf contains a struct kvm_s390_irq +for each interrupt to be injected into the guest. +If one of the interrupts could not be injected for some reason the +ioctl aborts. + +len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 +and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), +which is the maximum number of possibly pending cpu-local interrupts. 5. The kvm_run structure ------------------------ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bc0988093c5b..9de47265ef73 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2123,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, { return -EINVAL; } + +int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_irq *buf; + int r = 0; + int n; + + buf = vmalloc(len); + if (!buf) + return -ENOMEM; + + if (copy_from_user((void *) buf, irqstate, len)) { + r = -EFAULT; + goto out_free; + } + + /* + * Don't allow setting the interrupt state + * when there are already interrupts pending + */ + spin_lock(&li->lock); + if (li->pending_irqs) { + r = -EBUSY; + goto out_unlock; + } + + for (n = 0; n < len / sizeof(*buf); n++) { + r = do_inject_vcpu(vcpu, &buf[n]); + if (r) + break; + } + +out_unlock: + spin_unlock(&li->lock); +out_free: + vfree(buf); + + return r; +} + +static void store_local_irq(struct kvm_s390_local_interrupt *li, + struct kvm_s390_irq *irq, + unsigned long irq_type) +{ + switch (irq_type) { + case IRQ_PEND_MCHK_EX: + case IRQ_PEND_MCHK_REP: + irq->type = KVM_S390_MCHK; + irq->u.mchk = li->irq.mchk; + break; + case IRQ_PEND_PROG: + irq->type = KVM_S390_PROGRAM_INT; + irq->u.pgm = li->irq.pgm; + break; + case IRQ_PEND_PFAULT_INIT: + irq->type = KVM_S390_INT_PFAULT_INIT; + irq->u.ext = li->irq.ext; + break; + case IRQ_PEND_EXT_EXTERNAL: + irq->type = KVM_S390_INT_EXTERNAL_CALL; + irq->u.extcall = li->irq.extcall; + break; + case IRQ_PEND_EXT_CLOCK_COMP: + irq->type = KVM_S390_INT_CLOCK_COMP; + break; + case IRQ_PEND_EXT_CPU_TIMER: + irq->type = KVM_S390_INT_CPU_TIMER; + break; + case IRQ_PEND_SIGP_STOP: + irq->type = KVM_S390_SIGP_STOP; + irq->u.stop = li->irq.stop; + break; + case IRQ_PEND_RESTART: + irq->type = KVM_S390_RESTART; + break; + case IRQ_PEND_SET_PREFIX: + irq->type = KVM_S390_SIGP_SET_PREFIX; + irq->u.prefix = li->irq.prefix; + break; + } +} + +int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) +{ + uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + unsigned long pending_irqs; + struct kvm_s390_irq irq; + unsigned long irq_type; + int cpuaddr; + int n = 0; + + spin_lock(&li->lock); + pending_irqs = li->pending_irqs; + memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending, + sizeof(sigp_emerg_pending)); + spin_unlock(&li->lock); + + for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) { + memset(&irq, 0, sizeof(irq)); + if (irq_type == IRQ_PEND_EXT_EMERGENCY) + continue; + if (n + sizeof(irq) > len) + return -ENOBUFS; + store_local_irq(&vcpu->arch.local_int, &irq, irq_type); + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + + if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) { + for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) { + memset(&irq, 0, sizeof(irq)); + if (n + sizeof(irq) > len) + return -ENOBUFS; + irq.type = KVM_S390_INT_EMERGENCY; + irq.u.emerg.code = cpuaddr; + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + } + + if ((sigp_ctrl & SIGP_CTRL_C) && + (atomic_read(&vcpu->arch.sie_block->cpuflags) & + CPUSTAT_ECALL_PEND)) { + if (n + sizeof(irq) > len) + return -ENOBUFS; + memset(&irq, 0, sizeof(irq)); + irq.type = KVM_S390_INT_EXTERNAL_CALL; + irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + + return n; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8bc25d405edf..3040b14751b8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -41,6 +41,9 @@ #include "trace-s390.h" #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ +#define LOCAL_IRQS 32 +#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ + (KVM_MAX_VCPUS + LOCAL_IRQS)) #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -181,6 +184,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: case KVM_CAP_S390_SKEYS: + case KVM_CAP_S390_IRQ_STATE: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -2500,6 +2504,38 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_S390_SET_IRQ_STATE: { + struct kvm_s390_irq_state irq_state; + + r = -EFAULT; + if (copy_from_user(&irq_state, argp, sizeof(irq_state))) + break; + if (irq_state.len > VCPU_IRQS_MAX_BUF || + irq_state.len == 0 || + irq_state.len % sizeof(struct kvm_s390_irq) > 0) { + r = -EINVAL; + break; + } + r = kvm_s390_set_irq_state(vcpu, + (void __user *) irq_state.buf, + irq_state.len); + break; + } + case KVM_S390_GET_IRQ_STATE: { + struct kvm_s390_irq_state irq_state; + + r = -EFAULT; + if (copy_from_user(&irq_state, argp, sizeof(irq_state))) + break; + if (irq_state.len == 0) { + r = -EINVAL; + break; + } + r = kvm_s390_get_irq_state(vcpu, + (__u8 __user *) irq_state.buf, + irq_state.len); + break; + } default: r = -ENOTTY; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 343644a59392..ca108b90ae56 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -272,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); +int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, + void __user *buf, int len); +int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, + __u8 __user *buf, int len); /* implemented in guestdbg.c */ void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c0632e87a00f..c045c725e521 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -558,6 +558,13 @@ struct kvm_s390_irq { } u; }; +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; + __u32 len; + __u32 reserved[4]; +}; + /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -803,6 +810,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_STSI 109 #define KVM_CAP_S390_SKEYS 110 #define KVM_CAP_S390_INJECT_IRQ 113 +#define KVM_CAP_S390_IRQ_STATE 114 #ifdef KVM_CAP_IRQ_ROUTING @@ -1185,6 +1193,9 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) /* Available with KVM_CAP_S390_INJECT_IRQ */ #define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq) +/* Available with KVM_CAP_S390_IRQ_STATE */ +#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) +#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v1.2.3-58-ga151