diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 32 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 15 |
3 files changed, 71 insertions, 2 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index dc8023060456..0b92fce3e6c0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code, return; } +#ifdef CONFIG_VMAP_STACK + /* + * Stack overflow? During boot, we can fault near the initial + * stack in the direct map, but that's not an overflow -- check + * that we're in vmalloc space to avoid this. + */ + if (is_vmalloc_addr((void *)address) && + (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || + address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { + register void *__sp asm("rsp"); + unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); + /* + * We're likely to be running with very little stack space + * left. It's plausible that we'd hit this condition but + * double-fault even before we get this far, in which case + * we're fine: the double-fault handler will deal with it. + * + * We don't want to make it all the way into the oops code + * and then double-fault, though, because we're likely to + * break the console driver and lose most of the stack dump. + */ + asm volatile ("movq %[stack], %%rsp\n\t" + "call handle_stack_overflow\n\t" + "1: jmp 1b" + : "+r" (__sp) + : "D" ("kernel stack overflow (page fault)"), + "S" (regs), "d" (address), + [stack] "rm" (stack)); + unreachable(); + } +#endif + /* * 32-bit: * diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index bda8d5eef04d..ddd2661c4502 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -40,17 +40,26 @@ * You need to add an if/def entry if you introduce a new memory region * compatible with KASLR. Your entry must be in logical order with memory * layout. For example, ESPFIX is before EFI because its virtual address is - * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to + * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to * ensure that this order is correct and won't be changed. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; -static const unsigned long vaddr_end = VMEMMAP_START; + +#if defined(CONFIG_X86_ESPFIX64) +static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; +#elif defined(CONFIG_EFI) +static const unsigned long vaddr_end = EFI_VA_START; +#else +static const unsigned long vaddr_end = __START_KERNEL_map; +#endif /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; EXPORT_SYMBOL(page_offset_base); unsigned long vmalloc_base = __VMALLOC_BASE; EXPORT_SYMBOL(vmalloc_base); +unsigned long vmemmap_base = __VMEMMAP_BASE; +EXPORT_SYMBOL(vmemmap_base); /* * Memory regions randomized by KASLR (except modules that use a separate logic @@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region { } kaslr_regions[] = { { &page_offset_base, 64/* Maximum */ }, { &vmalloc_base, VMALLOC_SIZE_TB }, + { &vmemmap_base, 1 }, }; /* Get size in bytes used by the memory region */ @@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void) struct rnd_state rand_state; unsigned long remain_entropy; + /* + * All these BUILD_BUG_ON checks ensures the memory layout is + * consistent with the vaddr_start/vaddr_end variables. + */ + BUILD_BUG_ON(vaddr_start >= vaddr_end); + BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && + vaddr_end >= EFI_VA_START); + BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || + config_enabled(CONFIG_EFI)) && + vaddr_end >= __START_KERNEL_map); + BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); + if (!kaslr_memory_enabled()) return; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4dbe65622810..a7655f6caf7d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if (likely(prev != next)) { + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + /* + * If our current stack is in vmalloc space and isn't + * mapped in the new pgd, we'll double-fault. Forcibly + * map it. + */ + unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); + + pgd_t *pgd = next->pgd + stack_pgd_index; + + if (unlikely(pgd_none(*pgd))) + set_pgd(pgd, init_mm.pgd[stack_pgd_index]); + } + #ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); #endif + cpumask_set_cpu(cpu, mm_cpumask(next)); /* |