diff options
24 files changed, 1658 insertions, 189 deletions
diff --git a/Documentation/ABI/testing/configfs-tsm b/Documentation/ABI/testing/configfs-tsm index dd24202b5ba5..534408bc1408 100644 --- a/Documentation/ABI/testing/configfs-tsm +++ b/Documentation/ABI/testing/configfs-tsm @@ -31,6 +31,18 @@ Description: Standardization v2.03 Section 4.1.8.1 MSG_REPORT_REQ. https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf +What: /sys/kernel/config/tsm/report/$name/manifestblob +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Optional supplemental data that a TSM may emit, visibility + of this attribute depends on TSM, and may be empty if no + manifest data is available. + + See 'service_provider' for information on the format of the + manifest blob. + What: /sys/kernel/config/tsm/report/$name/provider Date: September, 2023 KernelVersion: v6.7 @@ -80,3 +92,54 @@ Contact: linux-coco@lists.linux.dev Description: (RO) Indicates the minimum permissible value that can be written to @privlevel. + +What: /sys/kernel/config/tsm/report/$name/service_provider +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports from a service + provider for TVMs, like SEV-SNP running under an SVSM. + Specifying the service provider via this attribute will create + an attestation report as specified by the service provider. + The only currently supported service provider is "svsm". + + For the "svsm" service provider, see the Secure VM Service Module + for SEV-SNP Guests v1.00 Section 7. For the doc, search for + "site:amd.com "Secure VM Service Module for SEV-SNP + Guests", docID: 58019" + +What: /sys/kernel/config/tsm/report/$name/service_guid +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports from a service + provider for TVMs, like SEV-SNP running under an SVSM. + Specifying an empty/null GUID (00000000-0000-0000-0000-000000) + requests all active services within the service provider be + part of the attestation report. Specifying a GUID request + an attestation report of just the specified service using the + manifest form specified by the service_manifest_version + attribute. + + See 'service_provider' for information on the format of the + service guid. + +What: /sys/kernel/config/tsm/report/$name/service_manifest_version +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports from a service + provider for TVMs, like SEV-SNP running under an SVSM. + Indicates the service manifest version requested for the + attestation report (default 0). If this field is not set by + the user, the default manifest version of the service (the + service's initial/first manifest version) is returned. + + See 'service_provider' for information on the format of the + service manifest version. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 53ed1a803422..325873385b71 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -605,6 +605,18 @@ Description: Umwait control Note that a value of zero means there is no limit. Low order two bits must be zero. +What: /sys/devices/system/cpu/sev + /sys/devices/system/cpu/sev/vmpl +Date: May 2024 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> +Description: Secure Encrypted Virtualization (SEV) information + + This directory is only present when running as an SEV-SNP guest. + + vmpl: Reports the Virtual Machine Privilege Level (VMPL) at which + the SEV-SNP guest is running. + + What: /sys/devices/system/cpu/svm Date: August 2019 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> diff --git a/Documentation/arch/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst index 414bc7402ae7..6df3264f23b9 100644 --- a/Documentation/arch/x86/amd-memory-encryption.rst +++ b/Documentation/arch/x86/amd-memory-encryption.rst @@ -130,4 +130,31 @@ SNP feature support. More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR -[1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf +Secure VM Service Module (SVSM) +=============================== +SNP provides a feature called Virtual Machine Privilege Levels (VMPL) which +defines four privilege levels at which guest software can run. The most +privileged level is 0 and numerically higher numbers have lesser privileges. +More details in the AMD64 APM Vol 2, section "15.35.7 Virtual Machine +Privilege Levels", docID: 24593. + +When using that feature, different services can run at different protection +levels, apart from the guest OS but still within the secure SNP environment. +They can provide services to the guest, like a vTPM, for example. + +When a guest is not running at VMPL0, it needs to communicate with the software +running at VMPL0 to perform privileged operations or to interact with secure +services. An example fur such a privileged operation is PVALIDATE which is +*required* to be executed at VMPL0. + +In this scenario, the software running at VMPL0 is usually called a Secure VM +Service Module (SVSM). Discovery of an SVSM and the API used to communicate +with it is documented in "Secure VM Service Module for SEV-SNP Guests", docID: +58019. + +(Latest versions of the above-mentioned documents can be found by using +a search engine like duckduckgo.com and typing in: + + site:amd.com "Secure VM Service Module for SEV-SNP Guests", docID: 58019 + +for example.) diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst index e1eaf6a830ce..9d00967a5b2b 100644 --- a/Documentation/virt/coco/sev-guest.rst +++ b/Documentation/virt/coco/sev-guest.rst @@ -204,6 +204,17 @@ has taken care to make use of the SEV-SNP CPUID throughout all stages of boot. Otherwise, guest owner attestation provides no assurance that the kernel wasn't fed incorrect values at some point during boot. +4. SEV Guest Driver Communication Key +===================================== + +Communication between an SEV guest and the SEV firmware in the AMD Secure +Processor (ASP, aka PSP) is protected by a VM Platform Communication Key +(VMPCK). By default, the sev-guest driver uses the VMPCK associated with the +VM Privilege Level (VMPL) at which the guest is running. Should this key be +wiped by the sev-guest driver (see the driver for reasons why a VMPCK can be +wiped), a different key can be used by reloading the sev-guest driver and +specifying the desired key using the vmpck_id module parameter. + Reference --------- diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 0457a9d7e515..cd44e120fe53 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -127,7 +127,35 @@ static bool fault_in_kernel_space(unsigned long address) #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-shared.c" +#include "../../coco/sev/shared.c" + +static struct svsm_ca *svsm_get_caa(void) +{ + return boot_svsm_caa; +} + +static u64 svsm_get_caa_pa(void) +{ + return boot_svsm_caa_pa; +} + +static int svsm_perform_call_protocol(struct svsm_call *call) +{ + struct ghcb *ghcb; + int ret; + + if (boot_ghcb) + ghcb = boot_ghcb; + else + ghcb = NULL; + + do { + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) + : svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + return ret; +} bool sev_snp_enabled(void) { @@ -145,8 +173,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * If private -> shared then invalidate the page before requesting the * state change in the RMP table. */ - if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_4k_page(paddr, paddr, false); /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); @@ -161,8 +189,8 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. */ - if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_4k_page(paddr, paddr, true); } void snp_set_page_private(unsigned long paddr) @@ -256,6 +284,16 @@ void sev_es_shutdown_ghcb(void) error("SEV-ES CPU Features missing."); /* + * This denotes whether to use the GHCB MSR protocol or the GHCB + * shared page to perform a GHCB request. Since the GHCB page is + * being changed to encrypted, it can't be used to perform GHCB + * requests. Clear the boot_ghcb variable so that the GHCB MSR + * protocol is used to change the GHCB page over to an encrypted + * page. + */ + boot_ghcb = NULL; + + /* * GHCB Page must be flushed from the cache and mapped encrypted again. * Otherwise the running kernel will see strange cache effects when * trying to use that page. @@ -463,6 +501,13 @@ static bool early_snp_init(struct boot_params *bp) setup_cpuid_table(cc_info); /* + * Record the SVSM Calling Area (CA) address if the guest is not + * running at VMPL0. The CA will be used to communicate with the + * SVSM and request its services. + */ + svsm_setup_ca(cc_info); + + /* * Pass run-time kernel a pointer to CC info via boot_params so EFI * config table doesn't need to be searched again during early startup * phase. @@ -565,22 +610,31 @@ void sev_enable(struct boot_params *bp) * features. */ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { - if (!(get_hv_features() & GHCB_HV_FT_SNP)) + u64 hv_features; + int ret; + + hv_features = get_hv_features(); + if (!(hv_features & GHCB_HV_FT_SNP)) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); /* - * Enforce running at VMPL0. - * - * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically - * higher) privilege level. Here, clear the VMPL1 permission mask of the - * GHCB page. If the guest is not running at VMPL0, this will fail. + * Enforce running at VMPL0 or with an SVSM. * - * If the guest is running at VMPL0, it will succeed. Even if that operation - * modifies permission bits, it is still ok to do so currently because Linux - * SNP guests running at VMPL0 only run at VMPL0, so VMPL1 or higher - * permission mask changes are a don't-care. + * Use RMPADJUST (see the rmpadjust() function for a description of + * what the instruction does) to update the VMPL1 permissions of a + * page. If the guest is running at VMPL0, this will succeed. If the + * guest is running at any other VMPL, this will fail. Linux SNP guests + * only ever run at a single VMPL level so permission mask changes of a + * lesser-privileged VMPL are a don't-care. + */ + ret = rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1); + + /* + * Running at VMPL0 is not required if an SVSM is present and the hypervisor + * supports the required SVSM GHCB events. */ - if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1)) + if (ret && + !(snp_vmpl && (hv_features & GHCB_HV_FT_SNP_MULTI_VMPL))) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); } diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile index c816acf78b6a..eabdc7486538 100644 --- a/arch/x86/coco/Makefile +++ b/arch/x86/coco/Makefile @@ -6,3 +6,4 @@ CFLAGS_core.o += -fno-stack-protector obj-y += core.o obj-$(CONFIG_INTEL_TDX_GUEST) += tdx/ +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev/ diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile new file mode 100644 index 000000000000..4e375e7305ac --- /dev/null +++ b/arch/x86/coco/sev/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += core.o + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_core.o = -pg +endif + +KASAN_SANITIZE_core.o := n +KMSAN_SANITIZE_core.o := n +KCOV_INSTRUMENT_core.o := n + +# With some compiler versions the generated code results in boot hangs, caused +# by several compilation units. To be safe, disable all instrumentation. +KCSAN_SANITIZE := n diff --git a/arch/x86/kernel/sev.c b/arch/x86/coco/sev/core.c index 3342ed58e168..082d61d85dfc 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/coco/sev/core.c @@ -133,16 +133,20 @@ struct ghcb_state { struct ghcb *ghcb; }; +/* For early boot SVSM communication */ +static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); + static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); +static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); +static DEFINE_PER_CPU(u64, svsm_caa_pa); struct sev_config { __u64 debug : 1, /* - * A flag used by __set_pages_state() that indicates when the - * per-CPU GHCB has been created and registered and thus can be - * used by the BSP instead of the early boot GHCB. + * Indicates when the per-CPU GHCB has been created and registered + * and thus can be used by the BSP instead of the early boot GHCB. * * For APs, the per-CPU GHCB is created before they are started * and registered upon startup, so this flag can be used globally @@ -150,6 +154,15 @@ struct sev_config { */ ghcbs_initialized : 1, + /* + * Indicates when the per-CPU SVSM CA is to be used instead of the + * boot SVSM CA. + * + * For APs, the per-CPU SVSM CA is created as part of the AP + * bringup, so this flag can be used globally for the BSP and APs. + */ + use_cas : 1, + __reserved : 62; }; @@ -572,8 +585,61 @@ fault: return ES_EXCEPTION; } +static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) +{ + long error_code = ctxt->fi.error_code; + int trapnr = ctxt->fi.vector; + + ctxt->regs->orig_ax = ctxt->fi.error_code; + + switch (trapnr) { + case X86_TRAP_GP: + exc_general_protection(ctxt->regs, error_code); + break; + case X86_TRAP_UD: + exc_invalid_op(ctxt->regs); + break; + case X86_TRAP_PF: + write_cr2(ctxt->fi.cr2); + exc_page_fault(ctxt->regs, error_code); + break; + case X86_TRAP_AC: + exc_alignment_check(ctxt->regs, error_code); + break; + default: + pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); + BUG(); + } +} + /* Include code shared with pre-decompression boot stage */ -#include "sev-shared.c" +#include "shared.c" + +static inline struct svsm_ca *svsm_get_caa(void) +{ + /* + * Use rIP-relative references when called early in the boot. If + * ->use_cas is set, then it is late in the boot and no need + * to worry about rIP-relative references. + */ + if (RIP_REL_REF(sev_cfg).use_cas) + return this_cpu_read(svsm_caa); + else + return RIP_REL_REF(boot_svsm_caa); +} + +static u64 svsm_get_caa_pa(void) +{ + /* + * Use rIP-relative references when called early in the boot. If + * ->use_cas is set, then it is late in the boot and no need + * to worry about rIP-relative references. + */ + if (RIP_REL_REF(sev_cfg).use_cas) + return this_cpu_read(svsm_caa_pa); + else + return RIP_REL_REF(boot_svsm_caa_pa); +} static noinstr void __sev_put_ghcb(struct ghcb_state *state) { @@ -600,6 +666,44 @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state) } } +static int svsm_perform_call_protocol(struct svsm_call *call) +{ + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + int ret; + + /* + * This can be called very early in the boot, use native functions in + * order to avoid paravirt issues. + */ + flags = native_local_irq_save(); + + /* + * Use rip-relative references when called early in the boot. If + * ghcbs_initialized is set, then it is late in the boot and no need + * to worry about rip-relative references in called functions. + */ + if (RIP_REL_REF(sev_cfg).ghcbs_initialized) + ghcb = __sev_get_ghcb(&state); + else if (RIP_REL_REF(boot_ghcb)) + ghcb = RIP_REL_REF(boot_ghcb); + else + ghcb = NULL; + + do { + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) + : svsm_perform_msr_protocol(call); + } while (ret == -EAGAIN); + + if (RIP_REL_REF(sev_cfg).ghcbs_initialized) + __sev_put_ghcb(&state); + + native_local_irq_restore(flags); + + return ret; +} + void noinstr __sev_es_nmi_complete(void) { struct ghcb_state state; @@ -709,7 +813,6 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr, { unsigned long paddr_end; u64 val; - int ret; vaddr = vaddr & PAGE_MASK; @@ -717,12 +820,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr, paddr_end = paddr + (npages << PAGE_SHIFT); while (paddr < paddr_end) { - if (op == SNP_PAGE_STATE_SHARED) { - /* Page validation must be rescinded before changing to shared */ - ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false); - if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) - goto e_term; - } + /* Page validation must be rescinded before changing to shared */ + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_4k_page(vaddr, paddr, false); /* * Use the MSR protocol because this function can be called before @@ -744,12 +844,9 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr, paddr, GHCB_MSR_PSC_RESP_VAL(val))) goto e_term; - if (op == SNP_PAGE_STATE_PRIVATE) { - /* Page validation must be performed after changing to private */ - ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true); - if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) - goto e_term; - } + /* Page validation must be performed after changing to private */ + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_4k_page(vaddr, paddr, true); vaddr += PAGE_SIZE; paddr += PAGE_SIZE; @@ -913,22 +1010,49 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end) set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); } -static int snp_set_vmsa(void *va, bool vmsa) +static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) { - u64 attrs; + int ret; - /* - * Running at VMPL0 allows the kernel to change the VMSA bit for a page - * using the RMPADJUST instruction. However, for the instruction to - * succeed it must target the permissions of a lesser privileged - * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST - * instruction in the AMD64 APM Volume 3). - */ - attrs = 1; - if (vmsa) - attrs |= RMPADJUST_VMSA_PAGE_BIT; + if (snp_vmpl) { + struct svsm_call call = {}; + unsigned long flags; + + local_irq_save(flags); - return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + call.caa = this_cpu_read(svsm_caa); + call.rcx = __pa(va); + + if (make_vmsa) { + /* Protocol 0, Call ID 2 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); + call.rdx = __pa(caa); + call.r8 = apic_id; + } else { + /* Protocol 0, Call ID 3 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); + } + + ret = svsm_perform_call_protocol(&call); + + local_irq_restore(flags); + } else { + /* + * If the kernel runs at VMPL0, it can change the VMSA + * bit for a page using the RMPADJUST instruction. + * However, for the instruction to succeed it must + * target the permissions of a lesser privileged (higher + * numbered) VMPL level, so use VMPL1. + */ + u64 attrs = 1; + + if (make_vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + } + + return ret; } #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) @@ -962,11 +1086,11 @@ static void *snp_alloc_vmsa_page(int cpu) return page_address(p + 1); } -static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) { int err; - err = snp_set_vmsa(vmsa, false); + err = snp_set_vmsa(vmsa, NULL, apic_id, false); if (err) pr_err("clear VMSA page failed (%u), leaking page\n", err); else @@ -977,6 +1101,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *cur_vmsa, *vmsa; struct ghcb_state state; + struct svsm_ca *caa; unsigned long flags; struct ghcb *ghcb; u8 sipi_vector; @@ -1023,6 +1148,9 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) if (!vmsa) return -ENOMEM; + /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ + caa = per_cpu(svsm_caa, cpu); + /* CR4 should maintain the MCE value */ cr4 = native_read_cr4() & X86_CR4_MCE; @@ -1070,11 +1198,11 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) * VMPL level * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) */ - vmsa->vmpl = 0; + vmsa->vmpl = snp_vmpl; vmsa->sev_features = sev_status >> 2; /* Switch the page over to a VMSA page now that it is initialized */ - ret = snp_set_vmsa(vmsa, true); + ret = snp_set_vmsa(vmsa, caa, apic_id, true); if (ret) { pr_err("set VMSA page failed (%u)\n", ret); free_page((unsigned long)vmsa); @@ -1090,7 +1218,10 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) vc_ghcb_invalidate(ghcb); ghcb_set_rax(ghcb, vmsa->sev_features); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); - ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE); + ghcb_set_sw_exit_info_1(ghcb, + ((u64)apic_id << 32) | + ((u64)snp_vmpl << 16) | + SVM_VMGEXIT_AP_CREATE); ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); sev_es_wr_ghcb_msr(__pa(ghcb)); @@ -1108,13 +1239,13 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) /* Perform cleanup if there was an error */ if (ret) { - snp_cleanup_vmsa(vmsa); + snp_cleanup_vmsa(vmsa, apic_id); vmsa = NULL; } /* Free up any previous VMSA page */ if (cur_vmsa) - snp_cleanup_vmsa(cur_vmsa); + snp_cleanup_vmsa(cur_vmsa, apic_id); /* Record the current VMSA page */ per_cpu(sev_vmsa, cpu) = vmsa; @@ -1209,6 +1340,17 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) /* Is it a WRMSR? */ exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; + if (regs->cx == MSR_SVSM_CAA) { + /* Writes to the SVSM CAA msr are ignored */ + if (exit_info_1) + return ES_OK; + + regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); + regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); + + return ES_OK; + } + ghcb_set_rcx(ghcb, regs->cx); if (exit_info_1) { ghcb_set_rax(ghcb, regs->ax); @@ -1346,6 +1488,18 @@ static void __init alloc_runtime_data(int cpu) panic("Can't allocate SEV-ES runtime data"); per_cpu(runtime_data, cpu) = data; + + if (snp_vmpl) { + struct svsm_ca *caa; + + /* Allocate the SVSM CA page if an SVSM is present */ + caa = memblock_alloc(sizeof(*caa), PAGE_SIZE); + if (!caa) + panic("Can't allocate SVSM CA page\n"); + + per_cpu(svsm_caa, cpu) = caa; + per_cpu(svsm_caa_pa, cpu) = __pa(caa); + } } static void __init init_ghcb(int cpu) @@ -1395,6 +1549,32 @@ void __init sev_es_init_vc_handling(void) init_ghcb(cpu); } + /* If running under an SVSM, switch to the per-cpu CA */ + if (snp_vmpl) { + struct svsm_call call = {}; + unsigned long flags; + int ret; + + local_irq_save(flags); + + /* + * SVSM_CORE_REMAP_CA call: + * RAX = 0 (Protocol=0, CallID=0) + * RCX = New CA GPA + */ + call.caa = svsm_get_caa(); + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); + call.rcx = this_cpu_read(svsm_caa_pa); + ret = svsm_perform_call_protocol(&call); + if (ret) + panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", + ret, call.rax_out); + + sev_cfg.use_cas = true; + + local_irq_restore(flags); + } + sev_es_setup_play_dead(); /* Secondary CPUs use the runtime #VC handler */ @@ -1819,33 +1999,6 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, return result; } -static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) -{ - long error_code = ctxt->fi.error_code; - int trapnr = ctxt->fi.vector; - - ctxt->regs->orig_ax = ctxt->fi.error_code; - - switch (trapnr) { - case X86_TRAP_GP: - exc_general_protection(ctxt->regs, error_code); - break; - case X86_TRAP_UD: - exc_invalid_op(ctxt->regs); - break; - case X86_TRAP_PF: - write_cr2(ctxt->fi.cr2); - exc_page_fault(ctxt->regs, error_code); - break; - case X86_TRAP_AC: - exc_alignment_check(ctxt->regs, error_code); - break; - default: - pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); - BUG(); - } -} - static __always_inline bool is_vc2_stack(unsigned long sp) { return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); @@ -2095,6 +2248,47 @@ found_cc_info: return cc_info; } +static __head void svsm_setup(struct cc_blob_sev_info *cc_info) +{ + struct svsm_call call = {}; + int ret; + u64 pa; + + /* + * Record the SVSM Calling Area address (CAA) if the guest is not + * running at VMPL0. The CA will be used to communicate with the + * SVSM to perform the SVSM services. + */ + if (!svsm_setup_ca(cc_info)) + return; + + /* + * It is very early in the boot and the kernel is running identity + * mapped but without having adjusted the pagetables to where the + * kernel was loaded (physbase), so the get the CA address using + * RIP-relative addressing. + */ + pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); + + /* + * Switch over to the boot SVSM CA while the current CA is still + * addressable. There is no GHCB at this point so use the MSR protocol. + * + * SVSM_CORE_REMAP_CA call: + * RAX = 0 (Protocol=0, CallID=0) + * RCX = New CA GPA + */ + call.caa = svsm_get_caa(); + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); + call.rcx = pa; + ret = svsm_perform_call_protocol(&call); + if (ret) + panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", ret, call.rax_out); + + RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; + RIP_REL_REF(boot_svsm_caa_pa) = pa; +} + bool __head snp_init(struct boot_params *bp) { struct cc_blob_sev_info *cc_info; @@ -2108,6 +2302,8 @@ bool __head snp_init(struct boot_params *bp) setup_cpuid_table(cc_info); + svsm_setup(cc_info); + /* * The CC blob will be used later to access the secrets page. Cache * it here like the boot kernel does. @@ -2156,23 +2352,27 @@ static void dump_cpuid_table(void) * expected, but that initialization happens too early in boot to print any * sort of indicator, and there's not really any other good place to do it, * so do it here. + * + * If running as an SNP guest, report the current VM privilege level (VMPL). */ -static int __init report_cpuid_table(void) +static int __init report_snp_info(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); - if (!cpuid_table->count) - return 0; + if (cpuid_table->count) { + pr_info("Using SNP CPUID table, %d entries present.\n", + cpuid_table->count); - pr_info("Using SNP CPUID table, %d entries present.\n", - cpuid_table->count); + if (sev_cfg.debug) + dump_cpuid_table(); + } - if (sev_cfg.debug) - dump_cpuid_table(); + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + pr_info("SNP running at VMPL%u.\n", snp_vmpl); return 0; } -arch_initcall(report_cpuid_table); +arch_initcall(report_snp_info); static int __init init_sev_config(char *str) { @@ -2191,6 +2391,56 @@ static int __init init_sev_config(char *str) } __setup("sev=", init_sev_config); +static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) +{ + /* If (new) lengths have been returned, propagate them up */ + if (call->rcx_out != call->rcx) + input->manifest_buf.len = call->rcx_out; + + if (call->rdx_out != call->rdx) + input->certificates_buf.len = call->rdx_out; + + if (call->r8_out != call->r8) + input->report_buf.len = call->r8_out; +} + +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, + struct svsm_attest_call *input) +{ + struct svsm_attest_call *ac; + unsigned long flags; + u64 attest_call_pa; + int ret; + + if (!snp_vmpl) + return -EINVAL; + + local_irq_save(flags); + + call->caa = svsm_get_caa(); + + ac = (struct svsm_attest_call *)call->caa->svsm_buffer; + attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + + *ac = *input; + + /* + * Set input registers for the request and set RDX and R8 to known + * values in order to detect length values being returned in them. + */ + call->rax = call_id; + call->rcx = attest_call_pa; + call->rdx = -1; + call->r8 = -1; + ret = svsm_perform_call_protocol(call); + update_attest_input(call, input); + + local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); + int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { struct ghcb_state state; @@ -2299,3 +2549,58 @@ void sev_show_status(void) } pr_cont("\n"); } + +void __init snp_update_svsm_ca(void) +{ + if (!snp_vmpl) + return; + + /* Update the CAA to a proper kernel address */ + boot_svsm_caa = &boot_svsm_ca_page; +} + +#ifdef CONFIG_SYSFS +static ssize_t vmpl_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", snp_vmpl); +} + +static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); + +static struct attribute *vmpl_attrs[] = { + &vmpl_attr.attr, + NULL +}; + +static struct attribute_group sev_attr_group = { + .attrs = vmpl_attrs, +}; + +static int __init sev_sysfs_init(void) +{ + struct kobject *sev_kobj; + struct device *dev_root; + int ret; + + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return -ENODEV; + + dev_root = bus_get_dev_root(&cpu_subsys); + if (!dev_root) + return -ENODEV; + + sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); + put_device(dev_root); + + if (!sev_kobj) + return -ENOMEM; + + ret = sysfs_create_group(sev_kobj, &sev_attr_group); + if (ret) + kobject_put(sev_kobj); + + return ret; +} +arch_initcall(sev_sysfs_init); +#endif // CONFIG_SYSFS diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/coco/sev/shared.c index b4f8fa0f722c..71de53194089 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/coco/sev/shared.c @@ -21,8 +21,30 @@ #define WARN(condition, format...) (!!(condition)) #define sev_printk(fmt, ...) #define sev_printk_rtl(fmt, ...) +#undef vc_forward_exception +#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n") #endif +/* + * SVSM related information: + * When running under an SVSM, the VMPL that Linux is executing at must be + * non-zero. The VMPL is therefore used to indicate the presence of an SVSM. + * + * During boot, the page tables are set up as identity mapped and later + * changed to use kernel virtual addresses. Maintain separate virtual and + * physical addresses for the CAA to allow SVSM functions to be used during + * early boot, both with identity mapped virtual addresses and proper kernel + * virtual addresses. + */ +u8 snp_vmpl __ro_after_init; +EXPORT_SYMBOL_GPL(snp_vmpl); +static struct svsm_ca *boot_svsm_caa __ro_after_init; +static u64 boot_svsm_caa_pa __ro_after_init; + +static struct svsm_ca *svsm_get_caa(void); +static u64 svsm_get_caa_pa(void); +static int svsm_perform_call_protocol(struct svsm_call *call); + /* I/O parameters for CPUID-related helpers */ struct cpuid_leaf { u32 fn; @@ -229,6 +251,126 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt return ES_VMM_ERROR; } +static inline int svsm_process_result_codes(struct svsm_call *call) +{ + switch (call->rax_out) { + case SVSM_SUCCESS: + return 0; + case SVSM_ERR_INCOMPLETE: + case SVSM_ERR_BUSY: + return -EAGAIN; + default: + return -EINVAL; + } +} + +/* + * Issue a VMGEXIT to call the SVSM: + * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9) + * - Set the CA call pending field to 1 + * - Issue VMGEXIT + * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9) + * - Perform atomic exchange of the CA call pending field + * + * - See the "Secure VM Service Module for SEV-SNP Guests" specification for + * details on the calling convention. + * - The calling convention loosely follows the Microsoft X64 calling + * convention by putting arguments in RCX, RDX, R8 and R9. + * - RAX specifies the SVSM protocol/callid as input and the return code + * as output. + */ +static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending) +{ + register unsigned long rax asm("rax") = call->rax; + register unsigned long rcx asm("rcx") = call->rcx; + register unsigned long rdx asm("rdx") = call->rdx; + register unsigned long r8 asm("r8") = call->r8; + register unsigned long r9 asm("r9") = call->r9; + + call->caa->call_pending = 1; + + asm volatile("rep; vmmcall\n\t" + : "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9) + : : "memory"); + + *pending = xchg(&call->caa->call_pending, *pending); + + call->rax_out = rax; + call->rcx_out = rcx; + call->rdx_out = rdx; + call->r8_out = r8; + call->r9_out = r9; +} + +static int svsm_perform_msr_protocol(struct svsm_call *call) +{ + u8 pending = 0; + u64 val, resp; + + /* + * When using the MSR protocol, be sure to save and restore + * the current MSR value. + */ + val = sev_es_rd_ghcb_msr(); + + sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0)); + + svsm_issue_call(call, &pending); + + resp = sev_es_rd_ghcb_msr(); + + sev_es_wr_ghcb_msr(val); + + if (pending) + return -EINVAL; + + if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP) + return -EINVAL; + + if (GHCB_MSR_VMPL_RESP_VAL(resp)) + return -EINVAL; + + return svsm_process_result_codes(call); +} + +static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) +{ + struct es_em_ctxt ctxt; + u8 pending = 0; + + vc_ghcb_invalidate(ghcb); + + /* + * Fill in protocol and format specifiers. This can be called very early + * in the boot, so use rip-relative references as needed. + */ + ghcb->protocol_version = RIP_REL_REF(ghcb_version); + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + + svsm_issue_call(call, &pending); + + if (pending) + return -EINVAL; + + switch (verify_exception_info(ghcb, &ctxt)) { + case ES_OK: + break; + case ES_EXCEPTION: + vc_forward_exception(&ctxt); + fallthrough; + default: + return -EINVAL; + } + + return svsm_process_result_codes(call); +} + static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, struct es_em_ctxt *ctxt, u64 exit_code, u64 exit_info_1, @@ -1079,38 +1221,268 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) } } -static void pvalidate_pages(struct snp_psc_desc *desc) +static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size, + int ret, u64 svsm_ret) +{ + WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n", + pfn, action, page_size, ret, svsm_ret); + + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); +} + +static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret) +{ + unsigned int page_size; + bool action; + u64 pfn; + + pfn = pc->entry[pc->cur_index].pfn; + action = pc->entry[pc->cur_index].action; + page_size = pc->entry[pc->cur_index].page_size; + + __pval_terminate(pfn, action, page_size, ret, svsm_ret); +} + +static void svsm_pval_4k_page(unsigned long paddr, bool validate) +{ + struct svsm_pvalidate_call *pc; + struct svsm_call call = {}; + unsigned long flags; + u64 pc_pa; + int ret; + + /* + * This can be called very early in the boot, use native functions in + * order to avoid paravirt issues. + */ + flags = native_local_irq_save(); + + call.caa = svsm_get_caa(); + + pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; + pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + + pc->num_entries = 1; + pc->cur_index = 0; + pc->entry[0].page_size = RMP_PG_SIZE_4K; + pc->entry[0].action = validate; + pc->entry[0].ignore_cf = 0; + pc->entry[0].pfn = paddr >> PAGE_SHIFT; + + /* Protocol 0, Call ID 1 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); + call.rcx = pc_pa; + + ret = svsm_perform_call_protocol(&call); + if (ret) + svsm_pval_terminate(pc, ret, call.rax_out); + + native_local_irq_restore(flags); +} + +static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, bool validate) +{ + int ret; + + /* + * This can be called very early during boot, so use rIP-relative + * references as needed. + */ + if (RIP_REL_REF(snp_vmpl)) { + svsm_pval_4k_page(paddr, validate); + } else { + ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); + if (ret) + __pval_terminate(PHYS_PFN(paddr), validate, RMP_PG_SIZE_4K, ret, 0); + } +} + +static void pval_pages(struct snp_psc_desc *desc) { struct psc_entry *e; unsigned long vaddr; unsigned int size; unsigned int i; bool validate; + u64 pfn; int rc; for (i = 0; i <= desc->hdr.end_entry; i++) { e = &desc->entries[i]; - vaddr = (unsigned long)pfn_to_kaddr(e->gfn); + pfn = e->gfn; + vaddr = (unsigned long)pfn_to_kaddr(pfn); size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; validate = e->operation == SNP_PAGE_STATE_PRIVATE; rc = pvalidate(vaddr, size, validate); + if (!rc) + continue; + if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { unsigned long vaddr_end = vaddr + PMD_SIZE; - for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) { + for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) { rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); if (rc) - break; + __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0); } + } else { + __pval_terminate(pfn, validate, size, rc, 0); } + } +} + +static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, + struct svsm_pvalidate_call *pc) +{ + struct svsm_pvalidate_entry *pe; + + /* Nothing in the CA yet */ + pc->num_entries = 0; + pc->cur_index = 0; + + pe = &pc->entry[0]; + + while (pfn < pfn_end) { + pe->page_size = RMP_PG_SIZE_4K; + pe->action = action; + pe->ignore_cf = 0; + pe->pfn = pfn; + + pe++; + pfn++; + + pc->num_entries++; + if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) + break; + } + + return pfn; +} + +static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry, + struct svsm_pvalidate_call *pc) +{ + struct svsm_pvalidate_entry *pe; + struct psc_entry *e; + + /* Nothing in the CA yet */ + pc->num_entries = 0; + pc->cur_index = 0; + + pe = &pc->entry[0]; + e = &desc->entries[desc_entry]; + + while (desc_entry <= desc->hdr.end_entry) { + pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; + pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; + pe->ignore_cf = 0; + pe->pfn = e->gfn; + + pe++; + e++; + + desc_entry++; + pc->num_entries++; + if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) + break; + } + + return desc_entry; +} + +static void svsm_pval_pages(struct snp_psc_desc *desc) +{ + struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY]; + unsigned int i, pv_4k_count = 0; + struct svsm_pvalidate_call *pc; + struct svsm_call call = {}; + unsigned long flags; + bool action; + u64 pc_pa; + int ret; + + /* + * This can be called very early in the boot, use native functions in + * order to avoid paravirt issues. + */ + flags = native_local_irq_save(); + + /* + * The SVSM calling area (CA) can support processing 510 entries at a + * time. Loop through the Page State Change descriptor until the CA is + * full or the last entry in the descriptor is reached, at which time + * the SVSM is invoked. This repeats until all entries in the descriptor + * are processed. + */ + call.caa = svsm_get_caa(); + + pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; + pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + + /* Protocol 0, Call ID 1 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); + call.rcx = pc_pa; + + for (i = 0; i <= desc->hdr.end_entry;) { + i = svsm_build_ca_from_psc_desc(desc, i, pc); + + do { + ret = svsm_perform_call_protocol(&call); + if (!ret) + continue; + + /* + * Check if the entry failed because of an RMP mismatch (a + * PVALIDATE at 2M was requested, but the page is mapped in + * the RMP as 4K). + */ - if (rc) { - WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc); - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH && + pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) { + /* Save this entry for post-processing at 4K */ + pv_4k[pv_4k_count++] = pc->entry[pc->cur_index]; + + /* Skip to the next one unless at the end of the list */ + pc->cur_index++; + if (pc->cur_index < pc->num_entries) + ret = -EAGAIN; + else + ret = 0; + } + } while (ret == -EAGAIN); + + if (ret) + svsm_pval_terminate(pc, ret, call.rax_out); + } + + /* Process any entries that failed to be validated at 2M and validate them at 4K */ + for (i = 0; i < pv_4k_count; i++) { + u64 pfn, pfn_end; + + action = pv_4k[i].action; + pfn = pv_4k[i].pfn; + pfn_end = pfn + 512; + + while (pfn < pfn_end) { + pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc); + + ret = svsm_perform_call_protocol(&call); + if (ret) + svsm_pval_terminate(pc, ret, call.rax_out); } } + + native_local_irq_restore(flags); +} + +static void pvalidate_pages(struct snp_psc_desc *desc) +{ + if (snp_vmpl) + svsm_pval_pages(desc); + else + pval_pages(desc); } static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) @@ -1269,3 +1641,77 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, return ES_UNSUPPORTED; } + +/* + * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM + * services needed when not running in VMPL0. + */ +static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) +{ + struct snp_secrets_page *secrets_page; + struct snp_cpuid_table *cpuid_table; + unsigned int i; + u64 caa; + + BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE); + + /* + * Check if running at VMPL0. + * + * Use RMPADJUST (see the rmpadjust() function for a description of what + * the instruction does) to update the VMPL1 permissions of a page. If + * the guest is running at VMPL0, this will succeed and implies there is + * no SVSM. If the guest is running at any other VMPL, this will fail. + * Linux SNP guests only ever run at a single VMPL level so permission mask + * changes of a lesser-privileged VMPL are a don't-care. + * + * Use a rip-relative reference to obtain the proper address, since this + * routine is running identity mapped when called, both by the decompressor + * code and the early kernel code. + */ + if (!rmpadjust((unsigned long)&RIP_REL_REF(boot_ghcb_page), RMP_PG_SIZE_4K, 1)) + return false; + + /* + * Not running at VMPL0, ensure everything has been properly supplied + * for running under an SVSM. + */ + if (!cc_info || !cc_info->secrets_phys || cc_info->secrets_len != PAGE_SIZE) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE); + + secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys; + if (!secrets_page->svsm_size) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM); + + if (!secrets_page->svsm_guest_vmpl) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0); + + RIP_REL_REF(snp_vmpl) = secrets_page->svsm_guest_vmpl; + + caa = secrets_page->svsm_caa; + + /* + * An open-coded PAGE_ALIGNED() in order to avoid including + * kernel-proper headers into the decompressor. + */ + if (caa & (PAGE_SIZE - 1)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA); + + /* + * The CA is identity mapped when this routine is called, both by the + * decompressor code and the early kernel code. + */ + RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)caa; + RIP_REL_REF(boot_svsm_caa_pa) = caa; + + /* Advertise the SVSM presence via CPUID. */ + cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table(); + for (i = 0; i < cpuid_table->count; i++) { + struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; + + if (fn->eax_in == 0x8000001f) + fn->eax |= BIT(28); + } + + return true; +} diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a1dd81027c2d..b51e88d01b21 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -446,6 +446,7 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c5ae649d2df..cf7fc2b8e3ce 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -54,6 +54,26 @@ static __always_inline void native_halt(void) asm volatile("hlt": : :"memory"); } +static __always_inline int native_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static __always_inline unsigned long native_local_irq_save(void) +{ + unsigned long flags = native_save_fl(); + + native_irq_disable(); + + return flags; +} + +static __always_inline void native_local_irq_restore(unsigned long flags) +{ + if (!native_irqs_disabled_flags(flags)) + native_irq_enable(); +} + #endif #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3cb8dd6311c3..01342963011e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -660,6 +660,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 5a8246dd532f..e90d403f2068 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -98,6 +98,19 @@ enum psc_op { /* GHCBData[63:32] */ \ (((u64)(val) & GENMASK_ULL(63, 32)) >> 32) +/* GHCB Run at VMPL Request/Response */ +#define GHCB_MSR_VMPL_REQ 0x016 +#define GHCB_MSR_VMPL_REQ_LEVEL(v) \ + /* GHCBData[39:32] */ \ + (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \ + /* GHCBDdata[11:0] */ \ + GHCB_MSR_VMPL_REQ) + +#define GHCB_MSR_VMPL_RESP 0x017 +#define GHCB_MSR_VMPL_RESP_VAL(v) \ + /* GHCBData[63:32] */ \ + (((u64)(v) & GENMASK_ULL(63, 32)) >> 32) + /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 @@ -109,6 +122,7 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) +#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5) /* * SNP Page State Change NAE event @@ -163,6 +177,10 @@ struct snp_psc_desc { #define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */ #define GHCB_TERM_CPUID 4 /* CPUID-validation failure */ #define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */ +#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */ +#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */ +#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */ +#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ca20cc4e5826..ac5886ce252e 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -152,10 +152,119 @@ struct snp_secrets_page { u8 vmpck2[VMPCK_KEY_LEN]; u8 vmpck3[VMPCK_KEY_LEN]; struct secrets_os_area os_area; - u8 rsvd3[3840]; + + u8 vmsa_tweak_bitmap[64]; + + /* SVSM fields */ + u64 svsm_base; + u64 svsm_size; + u64 svsm_caa; + u32 svsm_max_version; + u8 svsm_guest_vmpl; + u8 rsvd3[3]; + + /* Remainder of page */ + u8 rsvd4[3744]; } __packed; +/* + * The SVSM Calling Area (CA) related structures. + */ +struct svsm_ca { + u8 call_pending; + u8 mem_available; + u8 rsvd1[6]; + + u8 svsm_buffer[PAGE_SIZE - 8]; +}; + +#define SVSM_SUCCESS 0 +#define SVSM_ERR_INCOMPLETE 0x80000000 +#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001 +#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002 +#define SVSM_ERR_INVALID_ADDRESS 0x80000003 +#define SVSM_ERR_INVALID_FORMAT 0x80000004 +#define SVSM_ERR_INVALID_PARAMETER 0x80000005 +#define SVSM_ERR_INVALID_REQUEST 0x80000006 +#define SVSM_ERR_BUSY 0x80000007 +#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006 + +/* + * The SVSM PVALIDATE related structures + */ +struct svsm_pvalidate_entry { + u64 page_size : 2, + action : 1, + ignore_cf : 1, + rsvd : 8, + pfn : 52; +}; + +struct svsm_pvalidate_call { + u16 num_entries; + u16 cur_index; + + u8 rsvd1[4]; + + struct svsm_pvalidate_entry entry[]; +}; + +#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \ + offsetof(struct svsm_pvalidate_call, entry)) / \ + sizeof(struct svsm_pvalidate_entry)) + +/* + * The SVSM Attestation related structures + */ +struct svsm_loc_entry { + u64 pa; + u32 len; + u8 rsvd[4]; +}; + +struct svsm_attest_call { + struct svsm_loc_entry report_buf; + struct svsm_loc_entry nonce; + struct svsm_loc_entry manifest_buf; + struct svsm_loc_entry certificates_buf; + + /* For attesting a single service */ + u8 service_guid[16]; + u32 service_manifest_ver; + u8 rsvd[4]; +}; + +/* + * SVSM protocol structure + */ +struct svsm_call { + struct svsm_ca *caa; + u64 rax; + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + u64 rax_out; + u64 rcx_out; + u64 rdx_out; + u64 r8_out; + u64 r9_out; +}; + +#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x)) +#define SVSM_CORE_REMAP_CA 0 +#define SVSM_CORE_PVALIDATE 1 +#define SVSM_CORE_CREATE_VCPU 2 +#define SVSM_CORE_DELETE_VCPU 3 + +#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x)) +#define SVSM_ATTEST_SERVICES 0 +#define SVSM_ATTEST_SINGLE_SERVICE 1 + #ifdef CONFIG_AMD_MEM_ENCRYPT + +extern u8 snp_vmpl; + extern void __sev_es_ist_enter(struct pt_regs *regs); extern void __sev_es_ist_exit(void); static __always_inline void sev_es_ist_enter(struct pt_regs *regs) @@ -181,6 +290,14 @@ static __always_inline void sev_es_nmi_complete(void) extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); extern void sev_enable(struct boot_params *bp); +/* + * RMPADJUST modifies the RMP permissions of a page of a lesser- + * privileged (numerically higher) VMPL. + * + * If the guest is running at a higher-privilege than the privilege + * level the instruction is targeting, the instruction will succeed, + * otherwise, it will fail. + */ static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { int rc; @@ -225,11 +342,16 @@ bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); void sev_show_status(void); -#else +void snp_update_svsm_ca(void); + +#else /* !CONFIG_AMD_MEM_ENCRYPT */ + +#define snp_vmpl 0 static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } @@ -253,12 +375,17 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } - +static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input) +{ + return -ENOTTY; +} static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } static inline void sev_show_status(void) { } -#endif +static inline void snp_update_svsm_ca(void) { } + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 20a0dd51700a..a847180836e4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -17,7 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_head32.o = -pg -CFLAGS_REMOVE_sev.o = -pg CFLAGS_REMOVE_rethook.o = -pg endif @@ -26,19 +25,16 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n -KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. KCSAN_SANITIZE := n KMSAN_SANITIZE_head$(BITS).o := n KMSAN_SANITIZE_nmi.o := n -KMSAN_SANITIZE_sev.o := n # If instrumentation of the following files is enabled, boot hangs during # first second. KCOV_INSTRUMENT_head$(BITS).o := n -KCOV_INSTRUMENT_sev.o := n CFLAGS_irq.o := -I $(src)/../include/asm/trace @@ -142,8 +138,6 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o - obj-$(CONFIG_CFI_CLANG) += cfi.o obj-$(CONFIG_CALL_THUNKS) += callthunks.o diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index e7b67519ddb5..86a476a426c2 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -2,7 +2,7 @@ /* * AMD Memory Encryption Support * - * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. * * Author: Tom Lendacky <thomas.lendacky@amd.com> */ @@ -510,6 +510,12 @@ void __init sme_early_init(void) */ x86_init.resources.dmi_setup = snp_dmi_setup; } + + /* + * Switch the SVSM CA mapping (if active) from identity mapped to + * kernel mapped. + */ + snp_update_svsm_ca(); } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 0ae10535c699..0ce17766c0e5 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -120,7 +120,7 @@ static __init void snp_enable(void *arg) bool snp_probe_rmptable_info(void) { - u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; + u64 rmp_sz, rmp_base, rmp_end; rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); rdmsrl(MSR_AMD64_RMP_END, rmp_end); @@ -137,28 +137,11 @@ bool snp_probe_rmptable_info(void) rmp_sz = rmp_end - rmp_base + 1; - /* - * Calculate the amount the memory that must be reserved by the BIOS to - * address the whole RAM, including the bookkeeping area. The RMP itself - * must also be covered. - */ - max_rmp_pfn = max_pfn; - if (PHYS_PFN(rmp_end) > max_pfn) - max_rmp_pfn = PHYS_PFN(rmp_end); - - calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; - - if (calc_rmp_sz > rmp_sz) { - pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", - calc_rmp_sz, rmp_sz); - return false; - } - probed_rmp_base = rmp_base; probed_rmp_size = rmp_sz; pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", - probed_rmp_base, probed_rmp_base + probed_rmp_size - 1); + rmp_base, rmp_end); return true; } @@ -206,9 +189,8 @@ void __init snp_fixup_e820_tables(void) */ static int __init snp_rmptable_init(void) { + u64 max_rmp_pfn, calc_rmp_sz, rmptable_size, rmp_end, val; void *rmptable_start; - u64 rmptable_size; - u64 val; if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; @@ -219,10 +201,28 @@ static int __init snp_rmptable_init(void) if (!probed_rmp_size) goto nosnp; + rmp_end = probed_rmp_base + probed_rmp_size - 1; + + /* + * Calculate the amount the memory that must be reserved by the BIOS to + * address the whole RAM, including the bookkeeping area. The RMP itself + * must also be covered. + */ + max_rmp_pfn = max_pfn; + if (PFN_UP(rmp_end) > max_pfn) + max_rmp_pfn = PFN_UP(rmp_end); + + calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; + if (calc_rmp_sz > probed_rmp_size) { + pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", + calc_rmp_sz, probed_rmp_size); + goto nosnp; + } + rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB); if (!rmptable_start) { pr_err("Failed to map RMP table\n"); - return 1; + goto nosnp; } /* diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 654290a8e1ba..f714009b9ff7 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -2,7 +2,7 @@ /* * AMD Secure Encrypted Virtualization (SEV) guest driver interface * - * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. * * Author: Brijesh Singh <brijesh.singh@amd.com> */ @@ -23,6 +23,7 @@ #include <linux/sockptr.h> #include <linux/cleanup.h> #include <linux/uuid.h> +#include <linux/configfs.h> #include <uapi/linux/sev-guest.h> #include <uapi/linux/psp-sev.h> @@ -38,6 +39,8 @@ #define SNP_REQ_MAX_RETRY_DURATION (60*HZ) #define SNP_REQ_RETRY_DELAY (2*HZ) +#define SVSM_MAX_RETRIES 3 + struct snp_guest_crypto { struct crypto_aead *tfm; u8 *iv, *authtag; @@ -70,8 +73,15 @@ struct snp_guest_dev { u8 *vmpck; }; -static u32 vmpck_id; -module_param(vmpck_id, uint, 0444); +/* + * The VMPCK ID represents the key used by the SNP guest to communicate with the + * SEV firmware in the AMD Secure Processor (ASP, aka PSP). By default, the key + * used will be the key associated with the VMPL at which the guest is running. + * Should the default key be wiped (see snp_disable_vmpck()), this parameter + * allows for using one of the remaining VMPCKs. + */ +static int vmpck_id = -1; +module_param(vmpck_id, int, 0444); MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP."); /* Mutex to serialize the shared buffer access and command handling. */ @@ -783,6 +793,143 @@ struct snp_msg_cert_entry { u32 length; }; +static int sev_svsm_report_new(struct tsm_report *report, void *data) +{ + unsigned int rep_len, man_len, certs_len; + struct tsm_desc *desc = &report->desc; + struct svsm_attest_call ac = {}; + unsigned int retry_count; + void *rep, *man, *certs; + struct svsm_call call; + unsigned int size; + bool try_again; + void *buffer; + u64 call_id; + int ret; + + /* + * Allocate pages for the request: + * - Report blob (4K) + * - Manifest blob (4K) + * - Certificate blob (16K) + * + * Above addresses must be 4K aligned + */ + rep_len = SZ_4K; + man_len = SZ_4K; + certs_len = SEV_FW_BLOB_MAX_SIZE; + + guard(mutex)(&snp_cmd_mutex); + + if (guid_is_null(&desc->service_guid)) { + call_id = SVSM_ATTEST_CALL(SVSM_ATTEST_SERVICES); + } else { + export_guid(ac.service_guid, &desc->service_guid); + ac.service_manifest_ver = desc->service_manifest_version; + + call_id = SVSM_ATTEST_CALL(SVSM_ATTEST_SINGLE_SERVICE); + } + + retry_count = 0; + +retry: + memset(&call, 0, sizeof(call)); + + size = rep_len + man_len + certs_len; + buffer = alloc_pages_exact(size, __GFP_ZERO); + if (!buffer) + return -ENOMEM; + + rep = buffer; + ac.report_buf.pa = __pa(rep); + ac.report_buf.len = rep_len; + + man = rep + rep_len; + ac.manifest_buf.pa = __pa(man); + ac.manifest_buf.len = man_len; + + certs = man + man_len; + ac.certificates_buf.pa = __pa(certs); + ac.certificates_buf.len = certs_len; + + ac.nonce.pa = __pa(desc->inblob); + ac.nonce.len = desc->inblob_len; + + ret = snp_issue_svsm_attest_req(call_id, &call, &ac); + if (ret) { + free_pages_exact(buffer, size); + + switch (call.rax_out) { + case SVSM_ERR_INVALID_PARAMETER: + try_again = false; + + if (ac.report_buf.len > rep_len) { + rep_len = PAGE_ALIGN(ac.report_buf.len); + try_again = true; + } + + if (ac.manifest_buf.len > man_len) { + man_len = PAGE_ALIGN(ac.manifest_buf.len); + try_again = true; + } + + if (ac.certificates_buf.len > certs_len) { + certs_len = PAGE_ALIGN(ac.certificates_buf.len); + try_again = true; + } + + /* If one of the buffers wasn't large enough, retry the request */ + if (try_again && retry_count < SVSM_MAX_RETRIES) { + retry_count++; + goto retry; + } + + return -EINVAL; + default: + pr_err_ratelimited("SVSM attestation request failed (%d / 0x%llx)\n", + ret, call.rax_out); + return -EINVAL; + } + } + + /* + * Allocate all the blob memory buffers at once so that the cleanup is + * done for errors that occur after the first allocation (i.e. before + * using no_free_ptr()). + */ + rep_len = ac.report_buf.len; + void *rbuf __free(kvfree) = kvzalloc(rep_len, GFP_KERNEL); + + man_len = ac.manifest_buf.len; + void *mbuf __free(kvfree) = kvzalloc(man_len, GFP_KERNEL); + + certs_len = ac.certificates_buf.len; + void *cbuf __free(kvfree) = certs_len ? kvzalloc(certs_len, GFP_KERNEL) : NULL; + + if (!rbuf || !mbuf || (certs_len && !cbuf)) { + free_pages_exact(buffer, size); + return -ENOMEM; + } + + memcpy(rbuf, rep, rep_len); + report->outblob = no_free_ptr(rbuf); + report->outblob_len = rep_len; + + memcpy(mbuf, man, man_len); + report->manifestblob = no_free_ptr(mbuf); + report->manifestblob_len = man_len; + + if (certs_len) { + memcpy(cbuf, certs, certs_len); + report->auxblob = no_free_ptr(cbuf); + report->auxblob_len = certs_len; + } + + free_pages_exact(buffer, size); + + return 0; +} + static int sev_report_new(struct tsm_report *report, void *data) { struct snp_msg_cert_entry *cert_table; @@ -797,6 +944,13 @@ static int sev_report_new(struct tsm_report *report, void *data) if (desc->inblob_len != SNP_REPORT_USER_DATA_SIZE) return -EINVAL; + if (desc->service_provider) { + if (strcmp(desc->service_provider, "svsm")) + return -EINVAL; + + return sev_svsm_report_new(report, data); + } + void *buf __free(kvfree) = kvzalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -885,9 +1039,42 @@ static int sev_report_new(struct tsm_report *report, void *data) return 0; } -static const struct tsm_ops sev_tsm_ops = { +static bool sev_report_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_GENERATION: + case TSM_REPORT_PROVIDER: + case TSM_REPORT_PRIVLEVEL: + case TSM_REPORT_PRIVLEVEL_FLOOR: + return true; + case TSM_REPORT_SERVICE_PROVIDER: + case TSM_REPORT_SERVICE_GUID: + case TSM_REPORT_SERVICE_MANIFEST_VER: + return snp_vmpl; + } + + return false; +} + +static bool sev_report_bin_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_INBLOB: + case TSM_REPORT_OUTBLOB: + case TSM_REPORT_AUXBLOB: + return true; + case TSM_REPORT_MANIFESTBLOB: + return snp_vmpl; + } + + return false; +} + +static struct tsm_ops sev_tsm_ops = { .name = KBUILD_MODNAME, .report_new = sev_report_new, + .report_attr_visible = sev_report_attr_visible, + .report_bin_attr_visible = sev_report_bin_attr_visible, }; static void unregister_sev_tsm(void *data) @@ -923,6 +1110,10 @@ static int __init sev_guest_probe(struct platform_device *pdev) if (!snp_dev) goto e_unmap; + /* Adjust the default VMPCK key based on the executing VMPL level */ + if (vmpck_id == -1) + vmpck_id = snp_vmpl; + ret = -EINVAL; snp_dev->vmpck = get_vmpck(vmpck_id, secrets, &snp_dev->os_area_msg_seqno); if (!snp_dev->vmpck) { @@ -968,7 +1159,10 @@ static int __init sev_guest_probe(struct platform_device *pdev) snp_dev->input.resp_gpa = __pa(snp_dev->response); snp_dev->input.data_gpa = __pa(snp_dev->certs_data); - ret = tsm_register(&sev_tsm_ops, snp_dev, &tsm_report_extra_type); + /* Set the privlevel_floor attribute based on the vmpck_id */ + sev_tsm_ops.privlevel_floor = vmpck_id; + + ret = tsm_register(&sev_tsm_ops, snp_dev); if (ret) goto e_free_cert_data; @@ -1009,8 +1203,13 @@ static void __exit sev_guest_remove(struct platform_device *pdev) * This driver is meant to be a common SEV guest interface driver and to * support any SEV guest API. As such, even though it has been introduced * with the SEV-SNP support, it is named "sev-guest". + * + * sev_guest_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this is ok because they cannot get unbound + * at runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. */ -static struct platform_driver sev_guest_driver = { +static struct platform_driver sev_guest_driver __refdata = { .remove_new = __exit_p(sev_guest_remove), .driver = { .name = "sev-guest", diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c index 1253bf76b570..2acba56ad42e 100644 --- a/drivers/virt/coco/tdx-guest/tdx-guest.c +++ b/drivers/virt/coco/tdx-guest/tdx-guest.c @@ -249,6 +249,28 @@ done: return ret; } +static bool tdx_report_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_GENERATION: + case TSM_REPORT_PROVIDER: + return true; + } + + return false; +} + +static bool tdx_report_bin_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_INBLOB: + case TSM_REPORT_OUTBLOB: + return true; + } + + return false; +} + static long tdx_guest_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -281,6 +303,8 @@ MODULE_DEVICE_TABLE(x86cpu, tdx_guest_ids); static const struct tsm_ops tdx_tsm_ops = { .name = KBUILD_MODNAME, .report_new = tdx_report_new, + .report_attr_visible = tdx_report_attr_visible, + .report_bin_attr_visible = tdx_report_bin_attr_visible, }; static int __init tdx_guest_init(void) @@ -301,7 +325,7 @@ static int __init tdx_guest_init(void) goto free_misc; } - ret = tsm_register(&tdx_tsm_ops, NULL, NULL); + ret = tsm_register(&tdx_tsm_ops, NULL); if (ret) goto free_quote; diff --git a/drivers/virt/coco/tsm.c b/drivers/virt/coco/tsm.c index d1c2db83a8ca..9432d4e303f1 100644 --- a/drivers/virt/coco/tsm.c +++ b/drivers/virt/coco/tsm.c @@ -14,7 +14,6 @@ static struct tsm_provider { const struct tsm_ops *ops; - const struct config_item_type *type; void *data; } provider; static DECLARE_RWSEM(tsm_rwsem); @@ -35,7 +34,7 @@ static DECLARE_RWSEM(tsm_rwsem); * The attestation report format is TSM provider specific, when / if a standard * materializes that can be published instead of the vendor layout. Until then * the 'provider' attribute indicates the format of 'outblob', and optionally - * 'auxblob'. + * 'auxblob' and 'manifestblob'. */ struct tsm_report_state { @@ -48,6 +47,7 @@ struct tsm_report_state { enum tsm_data_select { TSM_REPORT, TSM_CERTS, + TSM_MANIFEST, }; static struct tsm_report *to_tsm_report(struct config_item *cfg) @@ -119,6 +119,74 @@ static ssize_t tsm_report_privlevel_floor_show(struct config_item *cfg, } CONFIGFS_ATTR_RO(tsm_report_, privlevel_floor); +static ssize_t tsm_report_service_provider_store(struct config_item *cfg, + const char *buf, size_t len) +{ + struct tsm_report *report = to_tsm_report(cfg); + size_t sp_len; + char *sp; + int rc; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + + sp_len = (buf[len - 1] != '\n') ? len : len - 1; + + sp = kstrndup(buf, sp_len, GFP_KERNEL); + if (!sp) + return -ENOMEM; + kfree(report->desc.service_provider); + + report->desc.service_provider = sp; + + return len; +} +CONFIGFS_ATTR_WO(tsm_report_, service_provider); + +static ssize_t tsm_report_service_guid_store(struct config_item *cfg, + const char *buf, size_t len) +{ + struct tsm_report *report = to_tsm_report(cfg); + int rc; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + + report->desc.service_guid = guid_null; + + rc = guid_parse(buf, &report->desc.service_guid); + if (rc) + return rc; + + return len; +} +CONFIGFS_ATTR_WO(tsm_report_, service_guid); + +static ssize_t tsm_report_service_manifest_version_store(struct config_item *cfg, + const char *buf, size_t len) +{ + struct tsm_report *report = to_tsm_report(cfg); + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + report->desc.service_manifest_version = val; + + return len; +} +CONFIGFS_ATTR_WO(tsm_report_, service_manifest_version); + static ssize_t tsm_report_inblob_write(struct config_item *cfg, const void *buf, size_t count) { @@ -163,6 +231,9 @@ static ssize_t __read_report(struct tsm_report *report, void *buf, size_t count, if (select == TSM_REPORT) { out = report->outblob; len = report->outblob_len; + } else if (select == TSM_MANIFEST) { + out = report->manifestblob; + len = report->manifestblob_len; } else { out = report->auxblob; len = report->auxblob_len; @@ -188,7 +259,7 @@ static ssize_t read_cached_report(struct tsm_report *report, void *buf, /* * A given TSM backend always fills in ->outblob regardless of - * whether the report includes an auxblob or not. + * whether the report includes an auxblob/manifestblob or not. */ if (!report->outblob || state->read_generation != state->write_generation) @@ -224,8 +295,10 @@ static ssize_t tsm_report_read(struct tsm_report *report, void *buf, kvfree(report->outblob); kvfree(report->auxblob); + kvfree(report->manifestblob); report->outblob = NULL; report->auxblob = NULL; + report->manifestblob = NULL; rc = ops->report_new(report, provider.data); if (rc < 0) return rc; @@ -252,34 +325,31 @@ static ssize_t tsm_report_auxblob_read(struct config_item *cfg, void *buf, } CONFIGFS_BIN_ATTR_RO(tsm_report_, auxblob, NULL, TSM_OUTBLOB_MAX); -#define TSM_DEFAULT_ATTRS() \ - &tsm_report_attr_generation, \ - &tsm_report_attr_provider +static ssize_t tsm_report_manifestblob_read(struct config_item *cfg, void *buf, + size_t count) +{ + struct tsm_report *report = to_tsm_report(cfg); -static struct configfs_attribute *tsm_report_attrs[] = { - TSM_DEFAULT_ATTRS(), - NULL, -}; + return tsm_report_read(report, buf, count, TSM_MANIFEST); +} +CONFIGFS_BIN_ATTR_RO(tsm_report_, manifestblob, NULL, TSM_OUTBLOB_MAX); -static struct configfs_attribute *tsm_report_extra_attrs[] = { - TSM_DEFAULT_ATTRS(), - &tsm_report_attr_privlevel, - &tsm_report_attr_privlevel_floor, +static struct configfs_attribute *tsm_report_attrs[] = { + [TSM_REPORT_GENERATION] = &tsm_report_attr_generation, + [TSM_REPORT_PROVIDER] = &tsm_report_attr_provider, + [TSM_REPORT_PRIVLEVEL] = &tsm_report_attr_privlevel, + [TSM_REPORT_PRIVLEVEL_FLOOR] = &tsm_report_attr_privlevel_floor, + [TSM_REPORT_SERVICE_PROVIDER] = &tsm_report_attr_service_provider, + [TSM_REPORT_SERVICE_GUID] = &tsm_report_attr_service_guid, + [TSM_REPORT_SERVICE_MANIFEST_VER] = &tsm_report_attr_service_manifest_version, NULL, }; -#define TSM_DEFAULT_BIN_ATTRS() \ - &tsm_report_attr_inblob, \ - &tsm_report_attr_outblob - static struct configfs_bin_attribute *tsm_report_bin_attrs[] = { - TSM_DEFAULT_BIN_ATTRS(), - NULL, -}; - -static struct configfs_bin_attribute *tsm_report_bin_extra_attrs[] = { - TSM_DEFAULT_BIN_ATTRS(), - &tsm_report_attr_auxblob, + [TSM_REPORT_INBLOB] = &tsm_report_attr_inblob, + [TSM_REPORT_OUTBLOB] = &tsm_report_attr_outblob, + [TSM_REPORT_AUXBLOB] = &tsm_report_attr_auxblob, + [TSM_REPORT_MANIFESTBLOB] = &tsm_report_attr_manifestblob, NULL, }; @@ -288,8 +358,10 @@ static void tsm_report_item_release(struct config_item *cfg) struct tsm_report *report = to_tsm_report(cfg); struct tsm_report_state *state = to_state(report); + kvfree(report->manifestblob); kvfree(report->auxblob); kvfree(report->outblob); + kfree(report->desc.service_provider); kfree(state); } @@ -297,21 +369,44 @@ static struct configfs_item_operations tsm_report_item_ops = { .release = tsm_report_item_release, }; -const struct config_item_type tsm_report_default_type = { - .ct_owner = THIS_MODULE, - .ct_bin_attrs = tsm_report_bin_attrs, - .ct_attrs = tsm_report_attrs, - .ct_item_ops = &tsm_report_item_ops, +static bool tsm_report_is_visible(struct config_item *item, + struct configfs_attribute *attr, int n) +{ + guard(rwsem_read)(&tsm_rwsem); + if (!provider.ops) + return false; + + if (!provider.ops->report_attr_visible) + return true; + + return provider.ops->report_attr_visible(n); +} + +static bool tsm_report_is_bin_visible(struct config_item *item, + struct configfs_bin_attribute *attr, int n) +{ + guard(rwsem_read)(&tsm_rwsem); + if (!provider.ops) + return false; + + if (!provider.ops->report_bin_attr_visible) + return true; + + return provider.ops->report_bin_attr_visible(n); +} + +static struct configfs_group_operations tsm_report_attr_group_ops = { + .is_visible = tsm_report_is_visible, + .is_bin_visible = tsm_report_is_bin_visible, }; -EXPORT_SYMBOL_GPL(tsm_report_default_type); -const struct config_item_type tsm_report_extra_type = { +static const struct config_item_type tsm_report_type = { .ct_owner = THIS_MODULE, - .ct_bin_attrs = tsm_report_bin_extra_attrs, - .ct_attrs = tsm_report_extra_attrs, + .ct_bin_attrs = tsm_report_bin_attrs, + .ct_attrs = tsm_report_attrs, .ct_item_ops = &tsm_report_item_ops, + .ct_group_ops = &tsm_report_attr_group_ops, }; -EXPORT_SYMBOL_GPL(tsm_report_extra_type); static struct config_item *tsm_report_make_item(struct config_group *group, const char *name) @@ -326,7 +421,7 @@ static struct config_item *tsm_report_make_item(struct config_group *group, if (!state) return ERR_PTR(-ENOMEM); - config_item_init_type_name(&state->cfg, name, provider.type); + config_item_init_type_name(&state->cfg, name, &tsm_report_type); return &state->cfg; } @@ -353,16 +448,10 @@ static struct configfs_subsystem tsm_configfs = { .su_mutex = __MUTEX_INITIALIZER(tsm_configfs.su_mutex), }; -int tsm_register(const struct tsm_ops *ops, void *priv, - const struct config_item_type *type) +int tsm_register(const struct tsm_ops *ops, void *priv) { const struct tsm_ops *conflict; - if (!type) - type = &tsm_report_default_type; - if (!(type == &tsm_report_default_type || type == &tsm_report_extra_type)) - return -EINVAL; - guard(rwsem_write)(&tsm_rwsem); conflict = provider.ops; if (conflict) { @@ -372,7 +461,6 @@ int tsm_register(const struct tsm_ops *ops, void *priv, provider.ops = ops; provider.data = priv; - provider.type = type; return 0; } EXPORT_SYMBOL_GPL(tsm_register); @@ -384,7 +472,6 @@ int tsm_unregister(const struct tsm_ops *ops) return -EBUSY; provider.ops = NULL; provider.data = NULL; - provider.type = NULL; return 0; } EXPORT_SYMBOL_GPL(tsm_unregister); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 18677cd4e62f..43d6bde1adcc 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -580,6 +580,7 @@ static void detach_attrs(struct config_item * item) static int populate_attrs(struct config_item *item) { const struct config_item_type *t = item->ci_type; + struct configfs_group_operations *ops; struct configfs_attribute *attr; struct configfs_bin_attribute *bin_attr; int error = 0; @@ -587,14 +588,23 @@ static int populate_attrs(struct config_item *item) if (!t) return -EINVAL; + + ops = t->ct_group_ops; + if (t->ct_attrs) { for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) { + if (ops && ops->is_visible && !ops->is_visible(item, attr, i)) + continue; + if ((error = configfs_create_file(item, attr))) break; } } if (t->ct_bin_attrs) { for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) { + if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i)) + continue; + error = configfs_create_bin_file(item, bin_attr); if (error) break; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 2606711adb18..c771e9d0d0b9 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -216,6 +216,9 @@ struct configfs_group_operations { struct config_group *(*make_group)(struct config_group *group, const char *name); void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); + bool (*is_visible)(struct config_item *item, struct configfs_attribute *attr, int n); + bool (*is_bin_visible)(struct config_item *item, struct configfs_bin_attribute *attr, + int n); }; struct configfs_subsystem { diff --git a/include/linux/tsm.h b/include/linux/tsm.h index de8324a2223c..11b0c525be30 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -4,6 +4,7 @@ #include <linux/sizes.h> #include <linux/types.h> +#include <linux/uuid.h> #define TSM_INBLOB_MAX 64 #define TSM_OUTBLOB_MAX SZ_32K @@ -19,11 +20,17 @@ * @privlevel: optional privilege level to associate with @outblob * @inblob_len: sizeof @inblob * @inblob: arbitrary input data + * @service_provider: optional name of where to obtain the tsm report blob + * @service_guid: optional service-provider service guid to attest + * @service_manifest_version: optional service-provider service manifest version requested */ struct tsm_desc { unsigned int privlevel; size_t inblob_len; u8 inblob[TSM_INBLOB_MAX]; + char *service_provider; + guid_t service_guid; + unsigned int service_manifest_version; }; /** @@ -33,6 +40,8 @@ struct tsm_desc { * @outblob: generated evidence to provider to the attestation agent * @auxblob_len: sizeof(@auxblob) * @auxblob: (optional) auxiliary data to the report (e.g. certificate data) + * @manifestblob_len: sizeof(@manifestblob) + * @manifestblob: (optional) manifest data associated with the report */ struct tsm_report { struct tsm_desc desc; @@ -40,6 +49,42 @@ struct tsm_report { u8 *outblob; size_t auxblob_len; u8 *auxblob; + size_t manifestblob_len; + u8 *manifestblob; +}; + +/** + * enum tsm_attr_index - index used to reference report attributes + * @TSM_REPORT_GENERATION: index of the report generation number attribute + * @TSM_REPORT_PROVIDER: index of the provider name attribute + * @TSM_REPORT_PRIVLEVEL: index of the desired privilege level attribute + * @TSM_REPORT_PRIVLEVEL_FLOOR: index of the minimum allowed privileg level attribute + * @TSM_REPORT_SERVICE_PROVIDER: index of the service provider identifier attribute + * @TSM_REPORT_SERVICE_GUID: index of the service GUID attribute + * @TSM_REPORT_SERVICE_MANIFEST_VER: index of the service manifest version attribute + */ +enum tsm_attr_index { + TSM_REPORT_GENERATION, + TSM_REPORT_PROVIDER, + TSM_REPORT_PRIVLEVEL, + TSM_REPORT_PRIVLEVEL_FLOOR, + TSM_REPORT_SERVICE_PROVIDER, + TSM_REPORT_SERVICE_GUID, + TSM_REPORT_SERVICE_MANIFEST_VER, +}; + +/** + * enum tsm_bin_attr_index - index used to reference binary report attributes + * @TSM_REPORT_INBLOB: index of the binary report input attribute + * @TSM_REPORT_OUTBLOB: index of the binary report output attribute + * @TSM_REPORT_AUXBLOB: index of the binary auxiliary data attribute + * @TSM_REPORT_MANIFESTBLOB: index of the binary manifest data attribute + */ +enum tsm_bin_attr_index { + TSM_REPORT_INBLOB, + TSM_REPORT_OUTBLOB, + TSM_REPORT_AUXBLOB, + TSM_REPORT_MANIFESTBLOB, }; /** @@ -48,22 +93,20 @@ struct tsm_report { * @privlevel_floor: convey base privlevel for nested scenarios * @report_new: Populate @report with the report blob and auxblob * (optional), return 0 on successful population, or -errno otherwise + * @report_attr_visible: show or hide a report attribute entry + * @report_bin_attr_visible: show or hide a report binary attribute entry * * Implementation specific ops, only one is expected to be registered at * a time i.e. only one of "sev-guest", "tdx-guest", etc. */ struct tsm_ops { const char *name; - const unsigned int privlevel_floor; + unsigned int privlevel_floor; int (*report_new)(struct tsm_report *report, void *data); + bool (*report_attr_visible)(int n); + bool (*report_bin_attr_visible)(int n); }; -extern const struct config_item_type tsm_report_default_type; - -/* publish @privlevel, @privlevel_floor, and @auxblob attributes */ -extern const struct config_item_type tsm_report_extra_type; - -int tsm_register(const struct tsm_ops *ops, void *priv, - const struct config_item_type *type); +int tsm_register(const struct tsm_ops *ops, void *priv); int tsm_unregister(const struct tsm_ops *ops); #endif /* __TSM_H */ |