diff options
Diffstat (limited to 'arch/x86/kernel/acpi/madt_wakeup.c')
-rw-r--r-- | arch/x86/kernel/acpi/madt_wakeup.c | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c new file mode 100644 index 000000000000..6cfe762be28b --- /dev/null +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/acpi.h> +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/kexec.h> +#include <linux/memblock.h> +#include <linux/pgtable.h> +#include <linux/sched/hotplug.h> +#include <asm/apic.h> +#include <asm/barrier.h> +#include <asm/init.h> +#include <asm/intel_pt.h> +#include <asm/nmi.h> +#include <asm/processor.h> +#include <asm/reboot.h> + +/* Physical address of the Multiprocessor Wakeup Structure mailbox */ +static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; + +/* Virtual address of the Multiprocessor Wakeup Structure mailbox */ +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init; + +static u64 acpi_mp_pgd __ro_after_init; +static u64 acpi_mp_reset_vector_paddr __ro_after_init; + +static void acpi_mp_stop_this_cpu(void) +{ + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); +} + +static void acpi_mp_play_dead(void) +{ + play_dead_common(); + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); +} + +static void acpi_mp_cpu_die(unsigned int cpu) +{ + u32 apicid = per_cpu(x86_cpu_to_apicid, cpu); + unsigned long timeout; + + /* + * Use TEST mailbox command to prove that BIOS got control over + * the CPU before declaring it dead. + * + * BIOS has to clear 'command' field of the mailbox. + */ + acpi_mp_wake_mailbox->apic_id = apicid; + smp_store_release(&acpi_mp_wake_mailbox->command, + ACPI_MP_WAKE_COMMAND_TEST); + + /* Don't wait longer than a second. */ + timeout = USEC_PER_SEC; + while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout) + udelay(1); + + if (!timeout) + pr_err("Failed to hand over CPU %d to BIOS\n", cpu); +} + +/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */ +static void __init *alloc_pgt_page(void *dummy) +{ + return memblock_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static void __init free_pgt_page(void *pgt, void *dummy) +{ + return memblock_free(pgt, PAGE_SIZE); +} + +/* + * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at + * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches + * to the identity mapping and the function has be present at the same spot in + * the virtual address space before and after switching page tables. + */ +static int __init init_transition_pgtable(pgd_t *pgd) +{ + pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; + unsigned long vaddr, paddr; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + vaddr = (unsigned long)asm_acpi_mp_play_dead; + pgd += pgd_index(vaddr); + if (!pgd_present(*pgd)) { + p4d = (p4d_t *)alloc_pgt_page(NULL); + if (!p4d) + return -ENOMEM; + set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); + } + p4d = p4d_offset(pgd, vaddr); + if (!p4d_present(*p4d)) { + pud = (pud_t *)alloc_pgt_page(NULL); + if (!pud) + return -ENOMEM; + set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); + } + pud = pud_offset(p4d, vaddr); + if (!pud_present(*pud)) { + pmd = (pmd_t *)alloc_pgt_page(NULL); + if (!pmd) + return -ENOMEM; + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + pmd = pmd_offset(pud, vaddr); + if (!pmd_present(*pmd)) { + pte = (pte_t *)alloc_pgt_page(NULL); + if (!pte) + return -ENOMEM; + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + pte = pte_offset_kernel(pmd, vaddr); + + paddr = __pa(vaddr); + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); + + return 0; +} + +static int __init acpi_mp_setup_reset(u64 reset_vector) +{ + struct x86_mapping_info info = { + .alloc_pgt_page = alloc_pgt_page, + .free_pgt_page = free_pgt_page, + .page_flag = __PAGE_KERNEL_LARGE_EXEC, + .kernpg_flag = _KERNPG_TABLE_NOENC, + }; + pgd_t *pgd; + + pgd = alloc_pgt_page(NULL); + if (!pgd) + return -ENOMEM; + + for (int i = 0; i < nr_pfn_mapped; i++) { + unsigned long mstart, mend; + + mstart = pfn_mapped[i].start << PAGE_SHIFT; + mend = pfn_mapped[i].end << PAGE_SHIFT; + if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + } + + if (kernel_ident_mapping_init(&info, pgd, + PAGE_ALIGN_DOWN(reset_vector), + PAGE_ALIGN(reset_vector + 1))) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + + if (init_transition_pgtable(pgd)) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + + smp_ops.play_dead = acpi_mp_play_dead; + smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu; + smp_ops.cpu_die = acpi_mp_cpu_die; + + acpi_mp_reset_vector_paddr = reset_vector; + acpi_mp_pgd = __pa(pgd); + + return 0; +} + +static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip) +{ + if (!acpi_mp_wake_mailbox_paddr) { + pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); + return -EOPNOTSUPP; + } + + /* + * Remap mailbox memory only for the first call to acpi_wakeup_cpu(). + * + * Wakeup of secondary CPUs is fully serialized in the core code. + * No need to protect acpi_mp_wake_mailbox from concurrent accesses. + */ + if (!acpi_mp_wake_mailbox) { + acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr, + sizeof(*acpi_mp_wake_mailbox), + MEMREMAP_WB); + } + + /* + * Mailbox memory is shared between the firmware and OS. Firmware will + * listen on mailbox command address, and once it receives the wakeup + * command, the CPU associated with the given apicid will be booted. + * + * The value of 'apic_id' and 'wakeup_vector' must be visible to the + * firmware before the wakeup command is visible. smp_store_release() + * ensures ordering and visibility. + */ + acpi_mp_wake_mailbox->apic_id = apicid; + acpi_mp_wake_mailbox->wakeup_vector = start_ip; + smp_store_release(&acpi_mp_wake_mailbox->command, + ACPI_MP_WAKE_COMMAND_WAKEUP); + + /* + * Wait for the CPU to wake up. + * + * The CPU being woken up is essentially in a spin loop waiting to be + * woken up. It should not take long for it wake up and acknowledge by + * zeroing out ->command. + * + * ACPI specification doesn't provide any guidance on how long kernel + * has to wait for a wake up acknowledgment. It also doesn't provide + * a way to cancel a wake up request if it takes too long. + * + * In TDX environment, the VMM has control over how long it takes to + * wake up secondary. It can postpone scheduling secondary vCPU + * indefinitely. Giving up on wake up request and reporting error opens + * possible attack vector for VMM: it can wake up a secondary CPU when + * kernel doesn't expect it. Wait until positive result of the wake up + * request. + */ + while (READ_ONCE(acpi_mp_wake_mailbox->command)) + cpu_relax(); + + return 0; +} + +static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake) +{ + cpu_hotplug_disable_offlining(); + + /* + * ACPI MADT doesn't allow to offline a CPU after it was onlined. This + * limits kexec: the second kernel won't be able to use more than one CPU. + * + * To prevent a kexec kernel from onlining secondary CPUs invalidate the + * mailbox address in the ACPI MADT wakeup structure which prevents a + * kexec kernel to use it. + * + * This is safe as the booting kernel has the mailbox address cached + * already and acpi_wakeup_cpu() uses the cached value to bring up the + * secondary CPUs. + * + * Note: This is a Linux specific convention and not covered by the + * ACPI specification. + */ + mp_wake->mailbox_address = 0; +} + +int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_multiproc_wakeup *mp_wake; + + mp_wake = (struct acpi_madt_multiproc_wakeup *)header; + + /* + * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake + * entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger + * than the actual size of the MP wakeup entry in ACPI table because the + * 'reset_vector' is only available in the V1 MP wakeup structure. + */ + if (!mp_wake) + return -EINVAL; + if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0) + return -EINVAL; + if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0) + return -EINVAL; + + acpi_table_print_madt_entry(&header->common); + + acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address; + + if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 && + mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) { + if (acpi_mp_setup_reset(mp_wake->reset_vector)) { + pr_warn("Failed to setup MADT reset vector\n"); + acpi_mp_disable_offlining(mp_wake); + } + } else { + /* + * CPU offlining requires version 1 of the ACPI MADT wakeup + * structure. + */ + acpi_mp_disable_offlining(mp_wake); + } + + apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); + + return 0; +} |