summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2023-08-31 13:21:27 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2023-08-31 13:21:27 -0400
commit69fd3876a4648499dbda4707fac646dc9c69fb0a (patch)
tree3a3c27aa8babbe8622b6c6e1bb86c77bdc27cca5
parent1814db83c049f3ab3e9a185b57a82f0ab53e58d3 (diff)
parent899e2206f46aece42d8194c350bc1de71344dbc7 (diff)
Merge tag 'kvm-s390-next-6.6-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
- PV crypto passthrough enablement (Tony, Steffen, Viktor, Janosch) Allows a PV guest to use crypto cards. Card access is governed by the firmware and once a crypto queue is "bound" to a PV VM every other entity (PV or not) looses access until it is not bound anymore. Enablement is done via flags when creating the PV VM. - Guest debug fixes (Ilya)
-rw-r--r--arch/s390/include/asm/kvm_host.h5
-rw-r--r--arch/s390/include/asm/uv.h25
-rw-r--r--arch/s390/include/uapi/asm/kvm.h16
-rw-r--r--arch/s390/kernel/uv.c5
-rw-r--r--arch/s390/kvm/intercept.c38
-rw-r--r--arch/s390/kvm/interrupt.c14
-rw-r--r--arch/s390/kvm/kvm-s390.c102
-rw-r--r--arch/s390/kvm/kvm-s390.h12
-rw-r--r--arch/s390/kvm/pv.c23
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c164
-rw-r--r--drivers/s390/crypto/vfio_ap_private.h6
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/s390x/debug_test.c160
14 files changed, 482 insertions, 91 deletions
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2bbc3d54959d..427f9528a7b6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -817,6 +817,8 @@ struct kvm_s390_cpu_model {
__u64 *fac_list;
u64 cpuid;
unsigned short ibc;
+ /* subset of available UV-features for pv-guests enabled by user space */
+ struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
};
typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
@@ -1028,6 +1030,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
extern char sie_exit;
+bool kvm_s390_pv_is_protected(struct kvm *kvm);
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index d6bb2f4f78d1..0e7bd3873907 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -99,6 +99,8 @@ enum uv_cmds_inst {
enum uv_feat_ind {
BIT_UV_FEAT_MISC = 0,
BIT_UV_FEAT_AIV = 1,
+ BIT_UV_FEAT_AP = 4,
+ BIT_UV_FEAT_AP_INTR = 5,
};
struct uv_cb_header {
@@ -159,7 +161,15 @@ struct uv_cb_cgc {
u64 guest_handle;
u64 conf_base_stor_origin;
u64 conf_virt_stor_origin;
- u64 reserved30;
+ u8 reserved30[6];
+ union {
+ struct {
+ u16 : 14;
+ u16 ap_instr_intr : 1;
+ u16 ap_allow_instr : 1;
+ };
+ u16 raw;
+ } flags;
u64 guest_stor_origin;
u64 guest_stor_len;
u64 guest_sca;
@@ -397,6 +407,13 @@ struct uv_info {
extern struct uv_info uv_info;
+static inline bool uv_has_feature(u8 feature_bit)
+{
+ if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+ return false;
+ return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
@@ -463,6 +480,7 @@ static inline int is_prot_virt_host(void)
return prot_virt_host;
}
+int uv_pin_shared(unsigned long paddr);
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_owned_page(unsigned long paddr);
@@ -475,6 +493,11 @@ void setup_uv(void);
#define is_prot_virt_host() 0
static inline void setup_uv(void) {}
+static inline int uv_pin_shared(unsigned long paddr)
+{
+ return 0;
+}
+
static inline int uv_destroy_owned_page(unsigned long paddr)
{
return 0;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index a73cf01a1606..abe926d43cbe 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
__u8 reserved[1728];
};
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64
+struct kvm_s390_vm_cpu_uv_feat {
+ union {
+ struct {
+ __u64 : 4;
+ __u64 ap : 1; /* bit 4 */
+ __u64 ap_intr : 1; /* bit 5 */
+ __u64 : 58;
+ };
+ __u64 feat;
+ };
+};
+
/* kvm attributes for crypto */
#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 66f0eb1c872b..fc07bc39e698 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -88,7 +88,7 @@ fail:
* Requests the Ultravisor to pin the page in the shared state. This will
* cause an intercept when the guest attempts to unshare the pinned page.
*/
-static int uv_pin_shared(unsigned long paddr)
+int uv_pin_shared(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_pin_shared);
/*
* Requests the Ultravisor to destroy a guest page and make it
@@ -257,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
* shared page from a different protected VM will automatically also
* transfer its ownership.
*/
- if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
return false;
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
return false;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 341abafb96e4..b16352083ff9 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+ if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+ return false;
+ if (guestdbg_sstep_enabled(vcpu) &&
+ vcpu->arch.sie_block->iprcc != PGM_PER) {
+ /*
+ * __vcpu_run() will exit after delivering the concurrently
+ * indicated condition.
+ */
+ return false;
+ }
+ return true;
+}
+
static int handle_prog(struct kvm_vcpu *vcpu)
{
psw_t psw;
@@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
if (kvm_s390_pv_cpu_is_protected(vcpu))
return -EOPNOTSUPP;
- if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+ if (should_handle_per_event(vcpu)) {
rc = kvm_s390_handle_per_event(vcpu);
if (rc)
return rc;
@@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu)
return handle_instruction(vcpu);
}
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+ /* Process PER, also if the instruction is processed in user space. */
+ if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+ return false;
+ if (rc != 0 && rc != -EOPNOTSUPP)
+ return false;
+ if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+ /* __vcpu_run() will exit after delivering the interrupt. */
+ return false;
+ return true;
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
int rc, per_rc = 0;
@@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
rc = handle_partial_execution(vcpu);
break;
case ICPT_KSS:
- rc = kvm_s390_skey_check_enable(vcpu);
- break;
+ /* Instruction will be redriven, skip the PER check. */
+ return kvm_s390_skey_check_enable(vcpu);
case ICPT_MCHKREQ:
case ICPT_INT_ENABLE:
/*
@@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
- /* process PER, also if the instruction is processed in user space */
- if (vcpu->arch.sie_block->icptstatus & 0x02 &&
- (!rc || rc == -EOPNOTSUPP))
+ if (should_handle_per_ifetch(vcpu, rc))
per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
return per_rc ? per_rc : rc;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9bd0a873f3b1..85e39f472bb4 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc = 0;
+ bool delivered = false;
unsigned long irq_type;
unsigned long irqs;
@@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
clear_bit(irq_type, &li->pending_irqs);
}
+ delivered |= !rc;
+ }
+
+ /*
+ * We delivered at least one interrupt and modified the PC. Force a
+ * singlestep event now.
+ */
+ if (delivered && guestdbg_sstep_enabled(vcpu)) {
+ struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+ debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+ debug_exit->type = KVM_SINGLESTEP;
+ vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
}
set_intercept_indicators(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d1e768bcfe1d..b3f17e014cab 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
return 0;
}
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \
+( \
+ ((struct kvm_s390_vm_cpu_uv_feat){ \
+ .ap = 1, \
+ .ap_intr = 1, \
+ }) \
+ .feat \
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+ unsigned long data, filter;
+
+ filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+ if (get_user(data, &ptr->feat))
+ return -EFAULT;
+ if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
+ kvm->arch.model.uv_feat_guest.feat = data;
+ mutex_unlock(&kvm->lock);
+
+ VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
+
+ return 0;
+}
+
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
ret = kvm_s390_set_processor_subfunc(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+ ret = kvm_s390_set_uv_feat(kvm, attr);
+ break;
}
return ret;
}
@@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
return 0;
}
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+ unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+ if (put_user(feat, &dst->feat))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+ return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+ unsigned long feat;
+
+ BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+ feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+ if (put_user(feat, &dst->feat))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+ return 0;
+}
+
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = kvm_s390_get_machine_subfunc(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+ ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+ ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+ break;
}
return ret;
}
@@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MACHINE_FEAT:
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+ case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = 0;
break;
default:
@@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
struct kvm_vcpu *vcpu;
/* Disable the GISA if the ultravisor does not support AIV. */
- if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+ if (!uv_has_feature(BIT_UV_FEAT_AIV))
kvm_s390_gisa_disable(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
+ kvm->arch.model.uv_feat_guest.feat = 0;
+
kvm_s390_crypto_init(kvm);
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
@@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
- if (rc)
+ if (rc || guestdbg_exit_pending(vcpu))
return rc;
}
@@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
do {
rc = vcpu_pre_run(vcpu);
- if (rc)
+ if (rc || guestdbg_exit_pending(vcpu))
break;
kvm_vcpu_srcu_read_unlock(vcpu);
@@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
+ int rc;
switch (ioctl) {
case KVM_S390_IRQ: {
@@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
return -EFAULT;
- return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+ break;
}
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
@@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
return -EFAULT;
if (s390int_to_s390irq(&s390int, &s390irq))
return -EINVAL;
- return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+ break;
}
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
}
- return -ENOIOCTLCMD;
+
+ /*
+ * To simplify single stepping of userspace-emulated instructions,
+ * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+ * should_handle_per_ifetch()). However, if userspace emulation injects
+ * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+ * after (and not before) the interrupt delivery.
+ */
+ if (!rc)
+ vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+ return rc;
}
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 0261d42c7d01..a7ea80cfa445 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
return vcpu->arch.pv.handle;
}
-static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
-{
- lockdep_assert_held(&kvm->lock);
- return !!kvm_s390_pv_get_handle(kvm);
-}
-
-static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
-{
- lockdep_assert_held(&vcpu->mutex);
- return !!kvm_s390_pv_cpu_get_handle(vcpu);
-}
-
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index bf1fdc7bf89e..75e81ba26d04 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -18,6 +18,20 @@
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+ return !!kvm_s390_pv_get_handle(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
+
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+ lockdep_assert_held(&vcpu->mutex);
+ return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
+
/**
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
* be destroyed
@@ -271,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
uvcb.header.rc, uvcb.header.rrc);
- WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+ WARN_ONCE(cc && uvcb.header.rc != 0x104,
+ "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
/* Intended memory leak on "impossible" error */
if (!cc)
@@ -561,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
uvcb.conf_base_stor_origin =
virt_to_phys((void *)kvm->arch.pv.stor_base);
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+ uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+ uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
cc = uv_call_sched(0, (u64)&uvcb);
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
- KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
- uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+ uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
/* Outputs */
kvm->arch.pv.handle = uvcb.guest_handle;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2f123429a291..7474c2059eb3 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -827,7 +827,7 @@ void do_secure_storage_access(struct pt_regs *regs)
* reliable without the misc UV feature so we need to check
* for that as well.
*/
- if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+ if (uv_has_feature(BIT_UV_FEAT_MISC) &&
!test_bit_inv(61, &regs->int_parm_long)) {
/*
* When this happens, userspace did something that it
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index b441745b0418..0509f80622cd 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -30,13 +30,12 @@
#define AP_QUEUE_UNASSIGNED "unassigned"
#define AP_QUEUE_IN_USE "in use"
-#define MAX_RESET_CHECK_WAIT 200 /* Sleep max 200ms for reset check */
#define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
/**
* get_update_locks_for_kvm: Acquire the locks required to dynamically update a
@@ -360,6 +359,28 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
return 0;
}
+static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
+{
+ int ret;
+
+ /*
+ * The nib has to be located in shared storage since guest and
+ * host access it. vfio_pin_pages() will do a pin shared and
+ * if that fails (possibly because it's not a shared page) it
+ * calls export. We try to do a second pin shared here so that
+ * the UV gives us an error code if we try to pin a non-shared
+ * page.
+ *
+ * If the page is already pinned shared the UV will return a success.
+ */
+ ret = uv_pin_shared(addr);
+ if (ret) {
+ /* vfio_pin_pages() likely exported the page so let's re-import */
+ gmap_convert_to_secure(gmap, addr);
+ }
+ return ret;
+}
+
/**
* vfio_ap_irq_enable - Enable Interruption for a APQN
*
@@ -423,6 +444,14 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
aqic_gisa.gisc = isc;
+ /* NIB in non-shared storage is a rc 6 for PV guests */
+ if (kvm_s390_pv_cpu_is_protected(vcpu) &&
+ ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) {
+ vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
+ status.response_code = AP_RESPONSE_INVALID_ADDRESS;
+ return status;
+ }
+
nisc = kvm_s390_gisc_register(kvm, isc);
if (nisc < 0) {
VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
@@ -675,7 +704,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
*/
apqn = AP_MKQID(apid, apqi);
q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
- if (!q || q->reset_rc) {
+ if (!q || q->reset_status.response_code) {
clear_bit_inv(apid,
matrix_mdev->shadow_apcb.apm);
break;
@@ -1608,19 +1637,21 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
{
switch (status->response_code) {
case AP_RESPONSE_NORMAL:
+ case AP_RESPONSE_DECONFIGURED:
+ return 0;
case AP_RESPONSE_RESET_IN_PROGRESS:
- if (status->queue_empty && !status->irq_enabled)
- return 0;
+ case AP_RESPONSE_BUSY:
return -EBUSY;
- case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE:
+ case AP_RESPONSE_ASSOC_FAILED:
/*
- * If the AP queue is deconfigured, any subsequent AP command
- * targeting the queue will fail with the same response code. On the
- * other hand, when an AP adapter is deconfigured, the associated
- * queues are reset, so let's return a value indicating the reset
- * for which we're waiting completed successfully.
+ * These asynchronous response codes indicate a PQAP(AAPQ)
+ * instruction to associate a secret with the guest failed. All
+ * subsequent AP instructions will end with the asynchronous
+ * response code until the AP queue is reset; so, let's return
+ * a value indicating a reset needs to be performed again.
*/
- return 0;
+ return -EAGAIN;
default:
WARN(true,
"failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n",
@@ -1630,91 +1661,105 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
}
}
-static int apq_reset_check(struct vfio_ap_queue *q)
+#define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)"
+
+static void apq_reset_check(struct work_struct *reset_work)
{
- int ret;
- int iters = MAX_RESET_CHECK_WAIT / AP_RESET_INTERVAL;
+ int ret = -EBUSY, elapsed = 0;
struct ap_queue_status status;
+ struct vfio_ap_queue *q;
- for (; iters > 0; iters--) {
+ q = container_of(reset_work, struct vfio_ap_queue, reset_work);
+ memcpy(&status, &q->reset_status, sizeof(status));
+ while (true) {
msleep(AP_RESET_INTERVAL);
+ elapsed += AP_RESET_INTERVAL;
status = ap_tapq(q->apqn, NULL);
ret = apq_status_check(q->apqn, &status);
- if (ret != -EBUSY)
- return ret;
+ if (ret == -EIO)
+ return;
+ if (ret == -EBUSY) {
+ pr_notice_ratelimited(WAIT_MSG, elapsed,
+ AP_QID_CARD(q->apqn),
+ AP_QID_QUEUE(q->apqn),
+ status.response_code,
+ status.queue_empty,
+ status.irq_enabled);
+ } else {
+ if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
+ q->reset_status.response_code == AP_RESPONSE_BUSY ||
+ q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS ||
+ ret == -EAGAIN) {
+ status = ap_zapq(q->apqn, 0);
+ memcpy(&q->reset_status, &status, sizeof(status));
+ continue;
+ }
+ /*
+ * When an AP adapter is deconfigured, the
+ * associated queues are reset, so let's set the
+ * status response code to 0 so the queue may be
+ * passed through (i.e., not filtered)
+ */
+ if (status.response_code == AP_RESPONSE_DECONFIGURED)
+ q->reset_status.response_code = 0;
+ if (q->saved_isc != VFIO_AP_ISC_INVALID)
+ vfio_ap_free_aqic_resources(q);
+ break;
+ }
}
- WARN_ONCE(iters <= 0,
- "timeout verifying reset of queue %02x.%04x (%u, %u, %u)",
- AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
- status.queue_empty, status.irq_enabled, status.response_code);
- return ret;
}
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
{
struct ap_queue_status status;
- int ret;
if (!q)
- return 0;
-retry_zapq:
+ return;
status = ap_zapq(q->apqn, 0);
- q->reset_rc = status.response_code;
+ memcpy(&q->reset_status, &status, sizeof(status));
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
- ret = 0;
- /* if the reset has not completed, wait for it to take effect */
- if (!status.queue_empty || status.irq_enabled)
- ret = apq_reset_check(q);
- break;
case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS:
/*
- * There is a reset issued by another process in progress. Let's wait
- * for that to complete. Since we have no idea whether it was a RAPQ or
- * ZAPQ, then if it completes successfully, let's issue the ZAPQ.
+ * Let's verify whether the ZAPQ completed successfully on a work queue.
*/
- ret = apq_reset_check(q);
- if (ret)
- break;
- goto retry_zapq;
+ queue_work(system_long_wq, &q->reset_work);
+ break;
case AP_RESPONSE_DECONFIGURED:
/*
* When an AP adapter is deconfigured, the associated
- * queues are reset, so let's return a value indicating the reset
- * completed successfully.
+ * queues are reset, so let's set the status response code to 0
+ * so the queue may be passed through (i.e., not filtered).
*/
- ret = 0;
+ q->reset_status.response_code = 0;
+ vfio_ap_free_aqic_resources(q);
break;
default:
WARN(true,
"PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n",
AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
status.response_code);
- return -EIO;
}
-
- vfio_ap_free_aqic_resources(q);
-
- return ret;
}
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
{
- int ret, loop_cursor, rc = 0;
+ int ret = 0, loop_cursor;
struct vfio_ap_queue *q;
+ hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
+ vfio_ap_mdev_reset_queue(q);
+
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
- ret = vfio_ap_mdev_reset_queue(q);
- /*
- * Regardless whether a queue turns out to be busy, or
- * is not operational, we need to continue resetting
- * the remaining queues.
- */
- if (ret)
- rc = ret;
+ flush_work(&q->reset_work);
+
+ if (q->reset_status.response_code)
+ ret = -EIO;
}
- return rc;
+ return ret;
}
static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
@@ -2038,6 +2083,8 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
q->apqn = to_ap_queue(&apdev->device)->qid;
q->saved_isc = VFIO_AP_ISC_INVALID;
+ memset(&q->reset_status, 0, sizeof(q->reset_status));
+ INIT_WORK(&q->reset_work, apq_reset_check);
matrix_mdev = get_update_locks_by_apqn(q->apqn);
if (matrix_mdev) {
@@ -2087,6 +2134,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
}
vfio_ap_mdev_reset_queue(q);
+ flush_work(&q->reset_work);
dev_set_drvdata(&apdev->device, NULL);
kfree(q);
release_update_locks_for_mdev(matrix_mdev);
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 4642bbdbd1b2..88aff8b81f2f 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -133,7 +133,8 @@ struct ap_matrix_mdev {
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
- * @reset_rc: the status response code from the last reset of the queue
+ * @reset_status: the status from the last reset of the queue
+ * @reset_work: work to wait for queue reset to complete
*/
struct vfio_ap_queue {
struct ap_matrix_mdev *matrix_mdev;
@@ -142,7 +143,8 @@ struct vfio_ap_queue {
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
struct hlist_node mdev_qnode;
- unsigned int reset_rc;
+ struct ap_queue_status reset_status;
+ struct work_struct reset_work;
};
int vfio_ap_mdev_register(void);
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 77026907968f..6092ccfc49ac 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -169,6 +169,7 @@ TEST_GEN_PROGS_s390x += s390x/resets
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/tprot
TEST_GEN_PROGS_s390x += s390x/cmma_test
+TEST_GEN_PROGS_s390x += s390x/debug_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += guest_print_test
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c
new file mode 100644
index 000000000000..84313fb27529
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/debug_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+ "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+ size_t new_psw_off, uint64_t *new_psw)
+{
+ struct kvm_guest_debug debug = {};
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ char *lowcore;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ lowcore = addr_gpa2hva(vm, 0);
+ new_psw[0] = (*vcpu)->run->psw_mask;
+ new_psw[1] = (uint64_t)int_handler;
+ memcpy(lowcore + new_psw_off, new_psw, 16);
+ vcpu_regs_get(*vcpu, &regs);
+ regs.gprs[2] = -1;
+ vcpu_regs_set(*vcpu, &regs);
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ vcpu_guest_debug_set(*vcpu, &debug);
+ vcpu_run(*vcpu);
+
+ return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+ ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+ "j .\n");
+
+static void test_step_pgm(void)
+{
+ test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm.code = PGM_SPECIFICATION,
+ };
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+ __LC_PGM_NEW_PSW, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+ vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+ "iske %r2,%r2\n"
+ "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+ test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+ "lctl %c0,%c0,1\n"
+ "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+ test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+ "svc 0\n"
+ "j .\n");
+
+static void test_step_svc(void)
+{
+ test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "single-step pgm", test_step_pgm },
+ { "single-step pgm caused by diag", test_step_pgm_diag },
+ { "single-step pgm caused by iske", test_step_pgm_iske },
+ { "single-step pgm caused by lctl", test_step_pgm_lctl },
+ { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}