summaryrefslogtreecommitdiff
path: root/arch/riscv/kvm
diff options
context:
space:
mode:
authorPalmer Dabbelt <palmer@rivosinc.com>2024-07-12 03:23:22 -0700
committerPalmer Dabbelt <palmer@rivosinc.com>2024-07-12 08:55:29 -0700
commit5ee121a39330e437cae0d64feeb459c7ec9e9500 (patch)
treeeb18b10aa0a73c8deec48fd88874245f19816f42 /arch/riscv/kvm
parentc9b8cd139c1dfb95eb86fd6a58cfe2089843d1d4 (diff)
parentf2c43c61160ecba15f073b79abf1ea351e41203d (diff)
Merge patch series "riscv: Apply Zawrs when available"
Andrew Jones <ajones@ventanamicro.com> says: Zawrs provides two instructions (wrs.nto and wrs.sto), where both are meant to allow the hart to enter a low-power state while waiting on a store to a memory location. The instructions also both wait an implementation-defined "short" duration (unless the implementation terminates the stall for another reason). The difference is that while wrs.sto will terminate when the duration elapses, wrs.nto, depending on configuration, will either just keep waiting or an ILL exception will be raised. Linux will use wrs.nto, so if platforms have an implementation which falls in the "just keep waiting" category (which is not expected), then it should _not_ advertise Zawrs in the hardware description. Like wfi (and with the same {m,h}status bits to configure it), when wrs.nto is configured to raise exceptions it's expected that the higher privilege level will see the instruction was a wait instruction, do something, and then resume execution following the instruction. For example, KVM does configure exceptions for wfi (hstatus.VTW=1) and therefore also for wrs.nto. KVM does this for wfi since it's better to allow other tasks to be scheduled while a VCPU waits for an interrupt. For waits such as those where wrs.nto/sto would be used, which are typically locks, it is also a good idea for KVM to be involved, as it can attempt to schedule the lock holding VCPU. This series starts with Christoph's addition of the riscv smp_cond_load_relaxed function which applies wrs.sto when available. That patch has been reworked to use wrs.nto and to use the same approach as Arm for the wait loop, since we can't have arbitrary C code between the load-reserved and the wrs. Then, hwprobe support is added (since the instructions are also usable from usermode), and finally KVM is taught about wrs.nto, allowing guests to see and use the Zawrs extension. We still don't have test results from hardware, and it's not possible to prove that using Zawrs is a win when testing on QEMU, not even when oversubscribing VCPUs to guests. However, it is possible to use KVM selftests to force a scenario where we can prove Zawrs does its job and does it well. [4] is a test which does this and, on my machine, without Zawrs it takes 16 seconds to complete and with Zawrs it takes 0.25 seconds. This series is also available here [1]. In order to use QEMU for testing a build with [2] is needed. In order to enable guests to use Zawrs with KVM using kvmtool, the branch at [3] may be used. [1] https://github.com/jones-drew/linux/commits/riscv/zawrs-v3/ [2] https://lore.kernel.org/all/20240312152901.512001-2-ajones@ventanamicro.com/ [3] https://github.com/jones-drew/kvmtool/commits/riscv/zawrs/ [4] https://github.com/jones-drew/linux/commit/cb2beccebcece10881db842ed69bdd5715cfab5d Link: https://lore.kernel.org/r/20240426100820.14762-8-ajones@ventanamicro.com * b4-shazam-merge: KVM: riscv: selftests: Add Zawrs extension to get-reg-list test KVM: riscv: Support guest wrs.nto riscv: hwprobe: export Zawrs ISA extension riscv: Add Zawrs support for spinlocks dt-bindings: riscv: Add Zawrs ISA extension description riscv: Provide a definition for 'pause' Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r--arch/riscv/kvm/vcpu.c1
-rw-r--r--arch/riscv/kvm/vcpu_insn.c15
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c2
3 files changed, 18 insertions, 0 deletions
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 17e21df36cc1..c58a0a7f5e5f 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -25,6 +25,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
+ STATS_DESC_COUNTER(VCPU, wrs_exit_stat),
STATS_DESC_COUNTER(VCPU, mmio_exit_user),
STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
STATS_DESC_COUNTER(VCPU, csr_exit_user),
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c
index ee7215f4071f..97dec18e6989 100644
--- a/arch/riscv/kvm/vcpu_insn.c
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -16,6 +16,9 @@
#define INSN_MASK_WFI 0xffffffff
#define INSN_MATCH_WFI 0x10500073
+#define INSN_MASK_WRS 0xffffffff
+#define INSN_MATCH_WRS 0x00d00073
+
#define INSN_MATCH_CSRRW 0x1073
#define INSN_MASK_CSRRW 0x707f
#define INSN_MATCH_CSRRS 0x2073
@@ -203,6 +206,13 @@ static int wfi_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
return KVM_INSN_CONTINUE_NEXT_SEPC;
}
+static int wrs_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
+{
+ vcpu->stat.wrs_exit_stat++;
+ kvm_vcpu_on_spin(vcpu, vcpu->arch.guest_context.sstatus & SR_SPP);
+ return KVM_INSN_CONTINUE_NEXT_SEPC;
+}
+
struct csr_func {
unsigned int base;
unsigned int count;
@@ -378,6 +388,11 @@ static const struct insn_func system_opcode_funcs[] = {
.match = INSN_MATCH_WFI,
.func = wfi_insn,
},
+ {
+ .mask = INSN_MASK_WRS,
+ .match = INSN_MATCH_WRS,
+ .func = wrs_insn,
+ },
};
static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index fa5ee544bc69..d9a9c1dc1636 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -42,6 +42,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
KVM_ISA_EXT_ARR(ZBC),
@@ -132,6 +133,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_RISCV_ISA_EXT_ZBA:
case KVM_RISCV_ISA_EXT_ZBB:
case KVM_RISCV_ISA_EXT_ZBC: