diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-03 15:32:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-03 15:32:22 -0700 |
commit | e8069f5a8e3bdb5fdeeff895780529388592ee7a (patch) | |
tree | ce35ab85db9b66a7e488707fccdb33ce54f696dd /arch/arm64/include | |
parent | eded37770c9f80ecd5ba842359c4f1058d9812c3 (diff) | |
parent | 255006adb3da71bb75c334453786df781b415f54 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM64:
- Eager page splitting optimization for dirty logging, optionally
allowing for a VM to avoid the cost of hugepage splitting in the
stage-2 fault path.
- Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact
with services that live in the Secure world. pKVM intervenes on
FF-A calls to guarantee the host doesn't misuse memory donated to
the hyp or a pKVM guest.
- Support for running the split hypervisor with VHE enabled, known as
'hVHE' mode. This is extremely useful for testing the split
hypervisor on VHE-only systems, and paves the way for new use cases
that depend on having two TTBRs available at EL2.
- Generalized framework for configurable ID registers from userspace.
KVM/arm64 currently prevents arbitrary CPU feature set
configuration from userspace, but the intent is to relax this
limitation and allow userspace to select a feature set consistent
with the CPU.
- Enable the use of Branch Target Identification (FEAT_BTI) in the
hypervisor.
- Use a separate set of pointer authentication keys for the
hypervisor when running in protected mode, as the host is untrusted
at runtime.
- Ensure timer IRQs are consistently released in the init failure
paths.
- Avoid trapping CTR_EL0 on systems with Enhanced Virtualization
Traps (FEAT_EVT), as it is a register commonly read from userspace.
- Erratum workaround for the upcoming AmpereOne part, which has
broken hardware A/D state management.
RISC-V:
- Redirect AMO load/store misaligned traps to KVM guest
- Trap-n-emulate AIA in-kernel irqchip for KVM guest
- Svnapot support for KVM Guest
s390:
- New uvdevice secret API
- CMM selftest and fixes
- fix racy access to target CPU for diag 9c
x86:
- Fix missing/incorrect #GP checks on ENCLS
- Use standard mmu_notifier hooks for handling APIC access page
- Drop now unnecessary TR/TSS load after VM-Exit on AMD
- Print more descriptive information about the status of SEV and
SEV-ES during module load
- Add a test for splitting and reconstituting hugepages during and
after dirty logging
- Add support for CPU pinning in demand paging test
- Add support for AMD PerfMonV2, with a variety of cleanups and minor
fixes included along the way
- Add a "nx_huge_pages=never" option to effectively avoid creating NX
hugepage recovery threads (because nx_huge_pages=off can be toggled
at runtime)
- Move handling of PAT out of MTRR code and dedup SVM+VMX code
- Fix output of PIC poll command emulation when there's an interrupt
- Add a maintainer's handbook to document KVM x86 processes,
preferred coding style, testing expectations, etc.
- Misc cleanups, fixes and comments
Generic:
- Miscellaneous bugfixes and cleanups
Selftests:
- Generate dependency files so that partial rebuilds work as
expected"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits)
Documentation/process: Add a maintainer handbook for KVM x86
Documentation/process: Add a label for the tip tree handbook's coding style
KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index
RISC-V: KVM: Remove unneeded semicolon
RISC-V: KVM: Allow Svnapot extension for Guest/VM
riscv: kvm: define vcpu_sbi_ext_pmu in header
RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip
RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
RISC-V: KVM: Add in-kernel emulation of AIA APLIC
RISC-V: KVM: Implement device interface for AIA irqchip
RISC-V: KVM: Skeletal in-kernel AIA irqchip support
RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
RISC-V: KVM: Add APLIC related defines
RISC-V: KVM: Add IMSIC related defines
RISC-V: KVM: Implement guest external interrupt line management
KVM: x86: Remove PRIx* definitions as they are solely for user space
s390/uv: Update query for secret-UVCs
s390/uv: replace scnprintf with sysfs_emit
s390/uvdevice: Add 'Lock Secret Store' UVC
...
Diffstat (limited to 'arch/arm64/include')
-rw-r--r-- | arch/arm64/include/asm/cpufeature.h | 6 | ||||
-rw-r--r-- | arch/arm64/include/asm/el2_setup.h | 27 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_arm.h | 7 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_asm.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 46 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 61 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_hyp.h | 37 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 79 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pkvm.h | 21 | ||||
-rw-r--r-- | arch/arm64/include/asm/sysreg.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/virt.h | 12 |
12 files changed, 256 insertions, 49 deletions
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 7a95c324e52a..96e50227f940 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -15,6 +15,9 @@ #define MAX_CPU_FEATURES 128 #define cpu_feature(x) KERNEL_HWCAP_ ## x +#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0 +#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4 + #ifndef __ASSEMBLY__ #include <linux/bug.h> @@ -905,6 +908,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) return 8; } +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); extern struct arm64_ftr_override id_aa64mmfr1_override; @@ -915,6 +919,8 @@ extern struct arm64_ftr_override id_aa64smfr0_override; extern struct arm64_ftr_override id_aa64isar1_override; extern struct arm64_ftr_override id_aa64isar2_override; +extern struct arm64_ftr_override arm64_sw_feature_override; + u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index f4c3d30bf746..8e5ffb58f83e 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -43,6 +43,11 @@ */ .macro __init_el2_timers mov x0, #3 // Enable EL1 physical timers + mrs x1, hcr_el2 + and x1, x1, #HCR_E2H + cbz x1, .LnVHE_\@ + lsl x0, x0, #10 +.LnVHE_\@: msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset .endm @@ -133,8 +138,15 @@ .endm /* Coprocessor traps */ -.macro __init_el2_nvhe_cptr +.macro __init_el2_cptr + mrs x1, hcr_el2 + and x1, x1, #HCR_E2H + cbz x1, .LnVHE_\@ + mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN) + b .Lset_cptr_\@ +.LnVHE_\@: mov x0, #0x33ff +.Lset_cptr_\@: msr cptr_el2, x0 // Disable copro. traps to EL2 .endm @@ -210,9 +222,8 @@ __init_el2_gicv3 __init_el2_hstr __init_el2_nvhe_idregs - __init_el2_nvhe_cptr + __init_el2_cptr __init_el2_fgt - __init_el2_nvhe_prepare_eret .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -258,7 +269,17 @@ .Linit_sve_\@: /* SVE register access */ mrs x0, cptr_el2 // Disable SVE traps + mrs x1, hcr_el2 + and x1, x1, #HCR_E2H + cbz x1, .Lcptr_nvhe_\@ + + // VHE case + orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) + b .Lset_cptr_\@ + +.Lcptr_nvhe_\@: // nVHE case bic x0, x0, #CPTR_EL2_TZ +.Lset_cptr_\@: msr cptr_el2, x0 isb mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c6e12e8f2751..58e5eb27da68 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -19,6 +19,7 @@ #define HCR_ATA_SHIFT 56 #define HCR_ATA (UL(1) << HCR_ATA_SHIFT) #define HCR_AMVOFFEN (UL(1) << 51) +#define HCR_TID4 (UL(1) << 49) #define HCR_FIEN (UL(1) << 47) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) @@ -87,7 +88,7 @@ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2) + HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) @@ -289,7 +290,6 @@ #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) #define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ -#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1 #define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \ GENMASK(29, 21) | \ GENMASK(19, 14) | \ @@ -351,8 +351,7 @@ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET) -#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ - CPACR_EL1_ZEN_EL1EN) +#define CPACR_EL1_TTA (1 << 28) #define kvm_mode_names \ { PSR_MODE_EL0t, "EL0t" }, \ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 86042afa86c3..7d170aaa2db4 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -68,6 +68,7 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, @@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void); extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); +extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, + int level); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index b31b32ecbe2d..efc0b45d79c3 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -62,19 +62,14 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) #else static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; - - WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, - &kvm->arch.flags)); - - return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags); + return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features); } #endif static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (is_kernel_in_hyp_mode()) + if (has_vhe() || has_hvhe()) vcpu->arch.hcr_el2 |= HCR_E2H; if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) { /* route synchronous external abort exceptions to EL2 */ @@ -95,6 +90,12 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TVM; } + if (cpus_have_final_cap(ARM64_HAS_EVT) && + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) + vcpu->arch.hcr_el2 |= HCR_TID4; + else + vcpu->arch.hcr_el2 |= HCR_TID2; + if (vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_RW; @@ -570,4 +571,35 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) return test_bit(feature, vcpu->arch.features); } +static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) +{ + u64 val; + + if (has_vhe()) { + val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | + CPACR_EL1_ZEN_EL1EN); + } else if (has_hvhe()) { + val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); + } else { + val = CPTR_NVHE_EL2_RES1; + + if (vcpu_has_sve(vcpu) && + (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) + val |= CPTR_EL2_TZ; + if (cpus_have_final_cap(ARM64_SME)) + val &= ~CPTR_EL2_TSM; + } + + return val; +} + +static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) +{ + u64 val = kvm_get_reset_cptr_el2(vcpu); + + if (has_vhe() || has_hvhe()) + write_sysreg(val, cpacr_el1); + else + write_sysreg(val, cptr_el2); +} #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d48609d95423..8b6096753740 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -39,6 +39,7 @@ #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS #define KVM_VCPU_MAX_FEATURES 7 +#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1) #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) @@ -159,6 +160,21 @@ struct kvm_s2_mmu { /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; +#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0 + /* + * Memory cache used to split + * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It + * is used to allocate stage2 page tables while splitting huge + * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE + * influences both the capacity of the split page cache, and + * how often KVM reschedules. Be wary of raising CHUNK_SIZE + * too high. + * + * Protected by kvm->slots_lock. + */ + struct kvm_mmu_memory_cache split_page_cache; + uint64_t split_page_chunk_size; + struct kvm_arch *arch; }; @@ -214,25 +230,23 @@ struct kvm_arch { #define KVM_ARCH_FLAG_MTE_ENABLED 1 /* At least one vCPU has ran in the VM */ #define KVM_ARCH_FLAG_HAS_RAN_ONCE 2 - /* - * The following two bits are used to indicate the guest's EL1 - * register width configuration. A value of KVM_ARCH_FLAG_EL1_32BIT - * bit is valid only when KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED is set. - * Otherwise, the guest's EL1 register width has not yet been - * determined yet. - */ -#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3 -#define KVM_ARCH_FLAG_EL1_32BIT 4 + /* The vCPU feature set for the VM is configured */ +#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3 /* PSCI SYSTEM_SUSPEND enabled for the guest */ -#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 +#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4 /* VM counter offset */ -#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 +#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5 /* Timer PPIs made immutable */ -#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 /* SMCCC filter initialized for the VM */ -#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8 +#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7 + /* Initial ID reg values loaded */ +#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8 unsigned long flags; + /* VM-wide vCPU feature set */ + DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES); + /* * VM-wide PMU filter, implemented as a bitmap and big enough for * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). @@ -242,18 +256,24 @@ struct kvm_arch { cpumask_var_t supported_cpus; - u8 pfr0_csv2; - u8 pfr0_csv3; - struct { - u8 imp:4; - u8 unimp:4; - } dfr0_pmuver; - /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; struct maple_tree smccc_filter; /* + * Emulated CPU ID registers per VM + * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it + * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8. + * + * These emulated idregs are VM-wide, but accessed from the context of a vCPU. + * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. + */ +#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) +#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)]) +#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) + u64 id_regs[KVM_ARM_ID_REG_NUM]; + + /* * For an untrusted host VM, 'pkvm.handle' is used to lookup * the associated pKVM instance in the hypervisor. */ @@ -410,6 +430,7 @@ struct kvm_host_data { struct kvm_host_psci_config { /* PSCI version used by host. */ u32 version; + u32 smccc_version; /* Function IDs used by host if version is v0.1. */ struct psci_0_1_function_ids function_ids_0_1; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index bdd9cf546d95..b7238c72a04c 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -16,12 +16,35 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DECLARE_PER_CPU(unsigned long, kvm_hyp_vector); DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); +/* + * Unified accessors for registers that have a different encoding + * between VHE and non-VHE. They must be specified without their "ELx" + * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h. + */ + +#if defined(__KVM_VHE_HYPERVISOR__) + +#define read_sysreg_el0(r) read_sysreg_s(r##_EL02) +#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02) +#define read_sysreg_el1(r) read_sysreg_s(r##_EL12) +#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12) +#define read_sysreg_el2(r) read_sysreg_s(r##_EL1) +#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1) + +#else // !__KVM_VHE_HYPERVISOR__ + +#if defined(__KVM_NVHE_HYPERVISOR__) +#define VHE_ALT_KEY ARM64_KVM_HVHE +#else +#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN +#endif + #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ - asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \ + asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \ __mrs_s("%0", r##vh), \ - ARM64_HAS_VIRT_HOST_EXTN) \ + VHE_ALT_KEY) \ : "=r" (reg)); \ reg; \ }) @@ -31,16 +54,10 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); u64 __val = (u64)(v); \ asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \ __msr_s(r##vh, "%x0"), \ - ARM64_HAS_VIRT_HOST_EXTN) \ + VHE_ALT_KEY) \ : : "rZ" (__val)); \ } while (0) -/* - * Unified accessors for registers that have a different encoding - * between VHE and non-VHE. They must be specified without their "ELx" - * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h. - */ - #define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) #define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) #define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) @@ -48,6 +65,8 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) +#endif // __KVM_VHE_HYPERVISOR__ + /* * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the * static inline can allow the compiler to out-of-line this. KVM always wants diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 27e63c111f78..0e1e1ab17b4d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -172,6 +172,7 @@ void __init free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); +void kvm_uninit_stage2_mmu(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); @@ -227,7 +228,8 @@ static inline void __invalidate_icache_guest_page(void *va, size_t size) if (icache_is_aliasing()) { /* any kind of VIPT cache */ icache_inval_all_pou(); - } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { + } else if (read_sysreg(CurrentEL) != CurrentEL_EL1 || + !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ icache_inval_pou((unsigned long)va, (unsigned long)va + size); } diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 93bd0975b15f..8294a9a7e566 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level) return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL; } +static inline u32 kvm_supported_block_sizes(void) +{ + u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL; + u32 r = 0; + + for (; level < KVM_PGTABLE_MAX_LEVELS; level++) + r |= BIT(kvm_granule_shift(level)); + + return r; +} + +static inline bool kvm_is_block_size_supported(u64 size) +{ + bool is_power_of_two = IS_ALIGNED(size, size); + + return is_power_of_two && (size & kvm_supported_block_sizes()); +} + /** * struct kvm_pgtable_mm_ops - Memory management callbacks. * @zalloc_page: Allocate a single zeroed memory page. @@ -104,7 +122,7 @@ static inline bool kvm_level_supports_block_mapping(u32 level) * allocation is physically contiguous. * @free_pages_exact: Free an exact number of memory pages previously * allocated by zalloc_pages_exact. - * @free_removed_table: Free a removed paging structure by unlinking and + * @free_unlinked_table: Free an unlinked paging structure by unlinking and * dropping references. * @get_page: Increment the refcount on a page. * @put_page: Decrement the refcount on a page. When the @@ -124,7 +142,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); - void (*free_removed_table)(void *addr, u32 level); + void (*free_unlinked_table)(void *addr, u32 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); @@ -195,6 +213,12 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, * with other software walkers. * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was * invoked from a fault handler. + * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries + * without Break-before-make's + * TLB invalidation. + * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries + * without Cache maintenance + * operations required. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), @@ -202,6 +226,8 @@ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_TABLE_POST = BIT(2), KVM_PGTABLE_WALK_SHARED = BIT(3), KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4), + KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5), + KVM_PGTABLE_WALK_SKIP_CMO = BIT(6), }; struct kvm_pgtable_visit_ctx { @@ -441,7 +467,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** - * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure. + * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. * @mm_ops: Memory management callbacks. * @pgtable: Unlinked stage-2 paging structure to be freed. * @level: Level of the stage-2 paging structure to be freed. @@ -449,7 +475,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * The page-table is assumed to be unreachable by any hardware walkers prior to * freeing and therefore no TLB invalidation is performed. */ -void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); +void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); + +/** + * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @phys: Physical address of the memory to map. + * @level: Starting level of the stage-2 paging structure to be created. + * @prot: Permissions and attributes for the mapping. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. + * @force_pte: Force mappings to PAGE_SIZE granularity. + * + * Returns an unlinked page-table tree. This new page-table tree is + * not reachable (i.e., it is unlinked) from the root pgd and it's + * therefore unreachableby the hardware page-table walker. No TLB + * invalidation or CMOs are performed. + * + * If device attributes are not explicitly requested in @prot, then the + * mapping will be normal, cacheable. + * + * Return: The fully populated (unlinked) stage-2 paging structure, or + * an ERR_PTR(error) on failure. + */ +kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, + u64 phys, u32 level, + enum kvm_pgtable_prot prot, + void *mc, bool force_pte); /** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. @@ -621,6 +673,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); /** + * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing + * to PAGE_SIZE guest pages. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to split. + * @size: Size of the range. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. + * + * The function tries to split any level 1 or 2 entry that overlaps + * with the input range (given by @addr and @size). + * + * Return: 0 on success, negative error code on failure. Note that + * kvm_pgtable_stage2_split() is best effort: it tries to break as many + * blocks in the input range as allowed by @mc_capacity. + */ +int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_mmu_memory_cache *mc); + +/** * kvm_pgtable_walk() - Walk a page-table. * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). * @addr: Input address for the start of the walk. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 01129b0d4c68..e46250a02017 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -6,7 +6,9 @@ #ifndef __ARM64_KVM_PKVM_H__ #define __ARM64_KVM_PKVM_H__ +#include <linux/arm_ffa.h> #include <linux/memblock.h> +#include <linux/scatterlist.h> #include <asm/kvm_pgtable.h> /* Maximum number of VMs that can co-exist under pKVM. */ @@ -106,4 +108,23 @@ static inline unsigned long host_s2_pgtable_pages(void) return res; } +#define KVM_FFA_MBOX_NR_PAGES 1 + +static inline unsigned long hyp_ffa_proxy_pages(void) +{ + size_t desc_max; + + /* + * The hypervisor FFA proxy needs enough memory to buffer a fragmented + * descriptor returned from EL3 in response to a RETRIEVE_REQ call. + */ + desc_max = sizeof(struct ffa_mem_region) + + sizeof(struct ffa_mem_region_attributes) + + sizeof(struct ffa_composite_mem_region) + + SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range); + + /* Plus a page each for the hypervisor's RX and TX mailboxes. */ + return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE); +} + #endif /* __ARM64_KVM_PKVM_H__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7a1e62631814..b481935e9314 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -510,6 +510,7 @@ (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ (BIT(29))) +#define SCTLR_EL2_BT (BIT(36)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE #else diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 4eb601e7de50..5227db7640c8 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -110,8 +110,10 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } -static inline bool is_kernel_in_hyp_mode(void) +static __always_inline bool is_kernel_in_hyp_mode(void) { + BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) || + __is_defined(__KVM_VHE_HYPERVISOR__)); return read_sysreg(CurrentEL) == CurrentEL_EL2; } @@ -140,6 +142,14 @@ static __always_inline bool is_protected_kvm_enabled(void) return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); } +static __always_inline bool has_hvhe(void) +{ + if (is_vhe_hyp_code()) + return false; + + return cpus_have_final_cap(ARM64_KVM_HVHE); +} + static inline bool is_hyp_nvhe(void) { return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); |