summaryrefslogtreecommitdiff
path: root/arch/arm64/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 15:32:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 15:32:22 -0700
commite8069f5a8e3bdb5fdeeff895780529388592ee7a (patch)
treece35ab85db9b66a7e488707fccdb33ce54f696dd /arch/arm64/include
parenteded37770c9f80ecd5ba842359c4f1058d9812c3 (diff)
parent255006adb3da71bb75c334453786df781b415f54 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM64: - Eager page splitting optimization for dirty logging, optionally allowing for a VM to avoid the cost of hugepage splitting in the stage-2 fault path. - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with services that live in the Secure world. pKVM intervenes on FF-A calls to guarantee the host doesn't misuse memory donated to the hyp or a pKVM guest. - Support for running the split hypervisor with VHE enabled, known as 'hVHE' mode. This is extremely useful for testing the split hypervisor on VHE-only systems, and paves the way for new use cases that depend on having two TTBRs available at EL2. - Generalized framework for configurable ID registers from userspace. KVM/arm64 currently prevents arbitrary CPU feature set configuration from userspace, but the intent is to relax this limitation and allow userspace to select a feature set consistent with the CPU. - Enable the use of Branch Target Identification (FEAT_BTI) in the hypervisor. - Use a separate set of pointer authentication keys for the hypervisor when running in protected mode, as the host is untrusted at runtime. - Ensure timer IRQs are consistently released in the init failure paths. - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps (FEAT_EVT), as it is a register commonly read from userspace. - Erratum workaround for the upcoming AmpereOne part, which has broken hardware A/D state management. RISC-V: - Redirect AMO load/store misaligned traps to KVM guest - Trap-n-emulate AIA in-kernel irqchip for KVM guest - Svnapot support for KVM Guest s390: - New uvdevice secret API - CMM selftest and fixes - fix racy access to target CPU for diag 9c x86: - Fix missing/incorrect #GP checks on ENCLS - Use standard mmu_notifier hooks for handling APIC access page - Drop now unnecessary TR/TSS load after VM-Exit on AMD - Print more descriptive information about the status of SEV and SEV-ES during module load - Add a test for splitting and reconstituting hugepages during and after dirty logging - Add support for CPU pinning in demand paging test - Add support for AMD PerfMonV2, with a variety of cleanups and minor fixes included along the way - Add a "nx_huge_pages=never" option to effectively avoid creating NX hugepage recovery threads (because nx_huge_pages=off can be toggled at runtime) - Move handling of PAT out of MTRR code and dedup SVM+VMX code - Fix output of PIC poll command emulation when there's an interrupt - Add a maintainer's handbook to document KVM x86 processes, preferred coding style, testing expectations, etc. - Misc cleanups, fixes and comments Generic: - Miscellaneous bugfixes and cleanups Selftests: - Generate dependency files so that partial rebuilds work as expected" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits) Documentation/process: Add a maintainer handbook for KVM x86 Documentation/process: Add a label for the tip tree handbook's coding style KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index RISC-V: KVM: Remove unneeded semicolon RISC-V: KVM: Allow Svnapot extension for Guest/VM riscv: kvm: define vcpu_sbi_ext_pmu in header RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel emulation of AIA APLIC RISC-V: KVM: Implement device interface for AIA irqchip RISC-V: KVM: Skeletal in-kernel AIA irqchip support RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero RISC-V: KVM: Add APLIC related defines RISC-V: KVM: Add IMSIC related defines RISC-V: KVM: Implement guest external interrupt line management KVM: x86: Remove PRIx* definitions as they are solely for user space s390/uv: Update query for secret-UVCs s390/uv: replace scnprintf with sysfs_emit s390/uvdevice: Add 'Lock Secret Store' UVC ...
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/el2_setup.h27
-rw-r--r--arch/arm64/include/asm/kvm_arm.h7
-rw-r--r--arch/arm64/include/asm/kvm_asm.h4
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h46
-rw-r--r--arch/arm64/include/asm/kvm_host.h61
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h37
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h4
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h79
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h21
-rw-r--r--arch/arm64/include/asm/sysreg.h1
-rw-r--r--arch/arm64/include/asm/virt.h12
12 files changed, 256 insertions, 49 deletions
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7a95c324e52a..96e50227f940 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -15,6 +15,9 @@
#define MAX_CPU_FEATURES 128
#define cpu_feature(x) KERNEL_HWCAP_ ## x
+#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
+#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
+
#ifndef __ASSEMBLY__
#include <linux/bug.h>
@@ -905,6 +908,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
return 8;
}
+s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
extern struct arm64_ftr_override id_aa64mmfr1_override;
@@ -915,6 +919,8 @@ extern struct arm64_ftr_override id_aa64smfr0_override;
extern struct arm64_ftr_override id_aa64isar1_override;
extern struct arm64_ftr_override id_aa64isar2_override;
+extern struct arm64_ftr_override arm64_sw_feature_override;
+
u32 get_kvm_ipa_limit(void);
void dump_cpu_features(void);
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index f4c3d30bf746..8e5ffb58f83e 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -43,6 +43,11 @@
*/
.macro __init_el2_timers
mov x0, #3 // Enable EL1 physical timers
+ mrs x1, hcr_el2
+ and x1, x1, #HCR_E2H
+ cbz x1, .LnVHE_\@
+ lsl x0, x0, #10
+.LnVHE_\@:
msr cnthctl_el2, x0
msr cntvoff_el2, xzr // Clear virtual offset
.endm
@@ -133,8 +138,15 @@
.endm
/* Coprocessor traps */
-.macro __init_el2_nvhe_cptr
+.macro __init_el2_cptr
+ mrs x1, hcr_el2
+ and x1, x1, #HCR_E2H
+ cbz x1, .LnVHE_\@
+ mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
+ b .Lset_cptr_\@
+.LnVHE_\@:
mov x0, #0x33ff
+.Lset_cptr_\@:
msr cptr_el2, x0 // Disable copro. traps to EL2
.endm
@@ -210,9 +222,8 @@
__init_el2_gicv3
__init_el2_hstr
__init_el2_nvhe_idregs
- __init_el2_nvhe_cptr
+ __init_el2_cptr
__init_el2_fgt
- __init_el2_nvhe_prepare_eret
.endm
#ifndef __KVM_NVHE_HYPERVISOR__
@@ -258,7 +269,17 @@
.Linit_sve_\@: /* SVE register access */
mrs x0, cptr_el2 // Disable SVE traps
+ mrs x1, hcr_el2
+ and x1, x1, #HCR_E2H
+ cbz x1, .Lcptr_nvhe_\@
+
+ // VHE case
+ orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+ b .Lset_cptr_\@
+
+.Lcptr_nvhe_\@: // nVHE case
bic x0, x0, #CPTR_EL2_TZ
+.Lset_cptr_\@:
msr cptr_el2, x0
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index c6e12e8f2751..58e5eb27da68 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -19,6 +19,7 @@
#define HCR_ATA_SHIFT 56
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
#define HCR_AMVOFFEN (UL(1) << 51)
+#define HCR_TID4 (UL(1) << 49)
#define HCR_FIEN (UL(1) << 47)
#define HCR_FWB (UL(1) << 46)
#define HCR_API (UL(1) << 41)
@@ -87,7 +88,7 @@
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
- HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2)
+ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
@@ -289,7 +290,6 @@
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
-#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
GENMASK(29, 21) | \
GENMASK(19, 14) | \
@@ -351,8 +351,7 @@
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
-#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
- CPACR_EL1_ZEN_EL1EN)
+#define CPACR_EL1_TTA (1 << 28)
#define kvm_mode_names \
{ PSR_MODE_EL0t, "EL0t" }, \
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 86042afa86c3..7d170aaa2db4 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -68,6 +68,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
@@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void);
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
int level);
+extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa,
+ int level);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index b31b32ecbe2d..efc0b45d79c3 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -62,19 +62,14 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
#else
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
-
- WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED,
- &kvm->arch.flags));
-
- return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
+ return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features);
}
#endif
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
- if (is_kernel_in_hyp_mode())
+ if (has_vhe() || has_hvhe())
vcpu->arch.hcr_el2 |= HCR_E2H;
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
/* route synchronous external abort exceptions to EL2 */
@@ -95,6 +90,12 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TVM;
}
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
+
if (vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -570,4 +571,35 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
return test_bit(feature, vcpu->arch.features);
}
+static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ if (has_vhe()) {
+ val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
+ CPACR_EL1_ZEN_EL1EN);
+ } else if (has_hvhe()) {
+ val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+ } else {
+ val = CPTR_NVHE_EL2_RES1;
+
+ if (vcpu_has_sve(vcpu) &&
+ (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
+ val |= CPTR_EL2_TZ;
+ if (cpus_have_final_cap(ARM64_SME))
+ val &= ~CPTR_EL2_TSM;
+ }
+
+ return val;
+}
+
+static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
+{
+ u64 val = kvm_get_reset_cptr_el2(vcpu);
+
+ if (has_vhe() || has_hvhe())
+ write_sysreg(val, cpacr_el1);
+ else
+ write_sysreg(val, cptr_el2);
+}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d48609d95423..8b6096753740 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -39,6 +39,7 @@
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
#define KVM_VCPU_MAX_FEATURES 7
+#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
@@ -159,6 +160,21 @@ struct kvm_s2_mmu {
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
+#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
+ /*
+ * Memory cache used to split
+ * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
+ * is used to allocate stage2 page tables while splitting huge
+ * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
+ * influences both the capacity of the split page cache, and
+ * how often KVM reschedules. Be wary of raising CHUNK_SIZE
+ * too high.
+ *
+ * Protected by kvm->slots_lock.
+ */
+ struct kvm_mmu_memory_cache split_page_cache;
+ uint64_t split_page_chunk_size;
+
struct kvm_arch *arch;
};
@@ -214,25 +230,23 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_MTE_ENABLED 1
/* At least one vCPU has ran in the VM */
#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
- /*
- * The following two bits are used to indicate the guest's EL1
- * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT
- * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set.
- * Otherwise, the guest's EL1 register width has not yet been
- * determined yet.
- */
-#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3
-#define KVM_ARCH_FLAG_EL1_32BIT 4
+ /* The vCPU feature set for the VM is configured */
+#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
/* PSCI SYSTEM_SUSPEND enabled for the guest */
-#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
+#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
/* VM counter offset */
-#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
+#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
/* Timer PPIs made immutable */
-#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
+#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
/* SMCCC filter initialized for the VM */
-#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8
+#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7
+ /* Initial ID reg values loaded */
+#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8
unsigned long flags;
+ /* VM-wide vCPU feature set */
+ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
+
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
@@ -242,18 +256,24 @@ struct kvm_arch {
cpumask_var_t supported_cpus;
- u8 pfr0_csv2;
- u8 pfr0_csv3;
- struct {
- u8 imp:4;
- u8 unimp:4;
- } dfr0_pmuver;
-
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
struct maple_tree smccc_filter;
/*
+ * Emulated CPU ID registers per VM
+ * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
+ * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
+ *
+ * These emulated idregs are VM-wide, but accessed from the context of a vCPU.
+ * Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
+ */
+#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
+#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
+#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
+ u64 id_regs[KVM_ARM_ID_REG_NUM];
+
+ /*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
* the associated pKVM instance in the hypervisor.
*/
@@ -410,6 +430,7 @@ struct kvm_host_data {
struct kvm_host_psci_config {
/* PSCI version used by host. */
u32 version;
+ u32 smccc_version;
/* Function IDs used by host if version is v0.1. */
struct psci_0_1_function_ids function_ids_0_1;
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index bdd9cf546d95..b7238c72a04c 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -16,12 +16,35 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+/*
+ * Unified accessors for registers that have a different encoding
+ * between VHE and non-VHE. They must be specified without their "ELx"
+ * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
+ */
+
+#if defined(__KVM_VHE_HYPERVISOR__)
+
+#define read_sysreg_el0(r) read_sysreg_s(r##_EL02)
+#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02)
+#define read_sysreg_el1(r) read_sysreg_s(r##_EL12)
+#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12)
+#define read_sysreg_el2(r) read_sysreg_s(r##_EL1)
+#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1)
+
+#else // !__KVM_VHE_HYPERVISOR__
+
+#if defined(__KVM_NVHE_HYPERVISOR__)
+#define VHE_ALT_KEY ARM64_KVM_HVHE
+#else
+#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN
+#endif
+
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
- asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
+ asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
__mrs_s("%0", r##vh), \
- ARM64_HAS_VIRT_HOST_EXTN) \
+ VHE_ALT_KEY) \
: "=r" (reg)); \
reg; \
})
@@ -31,16 +54,10 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
__msr_s(r##vh, "%x0"), \
- ARM64_HAS_VIRT_HOST_EXTN) \
+ VHE_ALT_KEY) \
: : "rZ" (__val)); \
} while (0)
-/*
- * Unified accessors for registers that have a different encoding
- * between VHE and non-VHE. They must be specified without their "ELx"
- * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
- */
-
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
@@ -48,6 +65,8 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
+#endif // __KVM_VHE_HYPERVISOR__
+
/*
* Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
* static inline can allow the compiler to out-of-line this. KVM always wants
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 27e63c111f78..0e1e1ab17b4d 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -172,6 +172,7 @@ void __init free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
+void kvm_uninit_stage2_mmu(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
@@ -227,7 +228,8 @@ static inline void __invalidate_icache_guest_page(void *va, size_t size)
if (icache_is_aliasing()) {
/* any kind of VIPT cache */
icache_inval_all_pou();
- } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
+ } else if (read_sysreg(CurrentEL) != CurrentEL_EL1 ||
+ !icache_is_vpipt()) {
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
icache_inval_pou((unsigned long)va, (unsigned long)va + size);
}
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 93bd0975b15f..8294a9a7e566 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
}
+static inline u32 kvm_supported_block_sizes(void)
+{
+ u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
+ u32 r = 0;
+
+ for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
+ r |= BIT(kvm_granule_shift(level));
+
+ return r;
+}
+
+static inline bool kvm_is_block_size_supported(u64 size)
+{
+ bool is_power_of_two = IS_ALIGNED(size, size);
+
+ return is_power_of_two && (size & kvm_supported_block_sizes());
+}
+
/**
* struct kvm_pgtable_mm_ops - Memory management callbacks.
* @zalloc_page: Allocate a single zeroed memory page.
@@ -104,7 +122,7 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
* allocation is physically contiguous.
* @free_pages_exact: Free an exact number of memory pages previously
* allocated by zalloc_pages_exact.
- * @free_removed_table: Free a removed paging structure by unlinking and
+ * @free_unlinked_table: Free an unlinked paging structure by unlinking and
* dropping references.
* @get_page: Increment the refcount on a page.
* @put_page: Decrement the refcount on a page. When the
@@ -124,7 +142,7 @@ struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
void* (*zalloc_pages_exact)(size_t size);
void (*free_pages_exact)(void *addr, size_t size);
- void (*free_removed_table)(void *addr, u32 level);
+ void (*free_unlinked_table)(void *addr, u32 level);
void (*get_page)(void *addr);
void (*put_page)(void *addr);
int (*page_count)(void *addr);
@@ -195,6 +213,12 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
* with other software walkers.
* @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
* invoked from a fault handler.
+ * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
+ * without Break-before-make's
+ * TLB invalidation.
+ * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
+ * without Cache maintenance
+ * operations required.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
@@ -202,6 +226,8 @@ enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
KVM_PGTABLE_WALK_SHARED = BIT(3),
KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
+ KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
+ KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
};
struct kvm_pgtable_visit_ctx {
@@ -441,7 +467,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
- * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
+ * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
* @mm_ops: Memory management callbacks.
* @pgtable: Unlinked stage-2 paging structure to be freed.
* @level: Level of the stage-2 paging structure to be freed.
@@ -449,7 +475,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* The page-table is assumed to be unreachable by any hardware walkers prior to
* freeing and therefore no TLB invalidation is performed.
*/
-void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+
+/**
+ * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @phys: Physical address of the memory to map.
+ * @level: Starting level of the stage-2 paging structure to be created.
+ * @prot: Permissions and attributes for the mapping.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @force_pte: Force mappings to PAGE_SIZE granularity.
+ *
+ * Returns an unlinked page-table tree. This new page-table tree is
+ * not reachable (i.e., it is unlinked) from the root pgd and it's
+ * therefore unreachableby the hardware page-table walker. No TLB
+ * invalidation or CMOs are performed.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable.
+ *
+ * Return: The fully populated (unlinked) stage-2 paging structure, or
+ * an ERR_PTR(error) on failure.
+ */
+kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
+ u64 phys, u32 level,
+ enum kvm_pgtable_prot prot,
+ void *mc, bool force_pte);
/**
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
@@ -621,6 +673,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
+ * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
+ * to PAGE_SIZE guest pages.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr: Intermediate physical address from which to split.
+ * @size: Size of the range.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ *
+ * The function tries to split any level 1 or 2 entry that overlaps
+ * with the input range (given by @addr and @size).
+ *
+ * Return: 0 on success, negative error code on failure. Note that
+ * kvm_pgtable_stage2_split() is best effort: it tries to break as many
+ * blocks in the input range as allowed by @mc_capacity.
+ */
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+
+/**
* kvm_pgtable_walk() - Walk a page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
* @addr: Input address for the start of the walk.
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 01129b0d4c68..e46250a02017 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -6,7 +6,9 @@
#ifndef __ARM64_KVM_PKVM_H__
#define __ARM64_KVM_PKVM_H__
+#include <linux/arm_ffa.h>
#include <linux/memblock.h>
+#include <linux/scatterlist.h>
#include <asm/kvm_pgtable.h>
/* Maximum number of VMs that can co-exist under pKVM. */
@@ -106,4 +108,23 @@ static inline unsigned long host_s2_pgtable_pages(void)
return res;
}
+#define KVM_FFA_MBOX_NR_PAGES 1
+
+static inline unsigned long hyp_ffa_proxy_pages(void)
+{
+ size_t desc_max;
+
+ /*
+ * The hypervisor FFA proxy needs enough memory to buffer a fragmented
+ * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
+ */
+ desc_max = sizeof(struct ffa_mem_region) +
+ sizeof(struct ffa_mem_region_attributes) +
+ sizeof(struct ffa_composite_mem_region) +
+ SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range);
+
+ /* Plus a page each for the hypervisor's RX and TX mailboxes. */
+ return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
+}
+
#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7a1e62631814..b481935e9314 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -510,6 +510,7 @@
(BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
(BIT(29)))
+#define SCTLR_EL2_BT (BIT(36))
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL2 SCTLR_ELx_EE
#else
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 4eb601e7de50..5227db7640c8 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -110,8 +110,10 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
-static inline bool is_kernel_in_hyp_mode(void)
+static __always_inline bool is_kernel_in_hyp_mode(void)
{
+ BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) ||
+ __is_defined(__KVM_VHE_HYPERVISOR__));
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
@@ -140,6 +142,14 @@ static __always_inline bool is_protected_kvm_enabled(void)
return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
}
+static __always_inline bool has_hvhe(void)
+{
+ if (is_vhe_hyp_code())
+ return false;
+
+ return cpus_have_final_cap(ARM64_KVM_HVHE);
+}
+
static inline bool is_hyp_nvhe(void)
{
return is_hyp_mode_available() && !is_kernel_in_hyp_mode();