diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 16 | ||||
-rw-r--r-- | arch/s390/mm/extable.c | 9 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 63 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 35 | ||||
-rw-r--r-- | arch/s390/mm/kasan_init.c | 246 | ||||
-rw-r--r-- | arch/s390/mm/maccess.c | 28 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 25 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 103 |
8 files changed, 250 insertions, 275 deletions
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 9953819d7959..ba5f80268878 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -33,10 +33,6 @@ enum address_markers_idx { #endif IDENTITY_AFTER_NR, IDENTITY_AFTER_END_NR, -#ifdef CONFIG_KASAN - KASAN_SHADOW_START_NR, - KASAN_SHADOW_END_NR, -#endif VMEMMAP_NR, VMEMMAP_END_NR, VMALLOC_NR, @@ -47,6 +43,10 @@ enum address_markers_idx { ABS_LOWCORE_END_NR, MEMCPY_REAL_NR, MEMCPY_REAL_END_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif }; static struct addr_marker address_markers[] = { @@ -62,10 +62,6 @@ static struct addr_marker address_markers[] = { #endif [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, -#ifdef CONFIG_KASAN - [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, - [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, -#endif [VMEMMAP_NR] = {0, "vmemmap Area Start"}, [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, [VMALLOC_NR] = {0, "vmalloc Area Start"}, @@ -76,6 +72,10 @@ static struct addr_marker address_markers[] = { [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, +#ifdef CONFIG_KASAN + [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, + [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, +#endif { -1, NULL } }; diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 1e4d2187541a..fe87291df95d 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -47,13 +47,16 @@ static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struc return true; } -static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, + bool pair, struct pt_regs *regs) { unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data); unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); regs->gprs[reg_err] = -EFAULT; regs->gprs[reg_zero] = 0; + if (pair) + regs->gprs[reg_zero + 1] = 0; regs->psw.addr = extable_fixup(ex); return true; } @@ -75,7 +78,9 @@ bool fixup_exception(struct pt_regs *regs) case EX_TYPE_UA_LOAD_MEM: return ex_handler_ua_load_mem(ex, regs); case EX_TYPE_UA_LOAD_REG: - return ex_handler_ua_load_reg(ex, regs); + return ex_handler_ua_load_reg(ex, false, regs); + case EX_TYPE_UA_LOAD_REGPAIR: + return ex_handler_ua_load_reg(ex, true, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9649d9382e0a..a2632fd97d00 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -46,11 +46,15 @@ #define __SUBCODE_MASK 0x0600 #define __PF_RES_FIELD 0x8000000000000000ULL -#define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000) -#define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000) -#define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000) -#define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000) -#define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000) +/* + * Allocate private vm_fault_reason from top. Please make sure it won't + * collide with vm_fault_reason. + */ +#define VM_FAULT_BADCONTEXT ((__force vm_fault_t)0x80000000) +#define VM_FAULT_BADMAP ((__force vm_fault_t)0x40000000) +#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x20000000) +#define VM_FAULT_SIGNAL ((__force vm_fault_t)0x10000000) +#define VM_FAULT_PFAULT ((__force vm_fault_t)0x8000000) enum fault_type { KERNEL_FAULT, @@ -96,6 +100,20 @@ static enum fault_type get_fault_type(struct pt_regs *regs) return KERNEL_FAULT; } +static unsigned long get_fault_address(struct pt_regs *regs) +{ + unsigned long trans_exc_code = regs->int_parm_long; + + return trans_exc_code & __FAIL_ADDR_MASK; +} + +static bool fault_is_write(struct pt_regs *regs) +{ + unsigned long trans_exc_code = regs->int_parm_long; + + return (trans_exc_code & store_indication) == 0x400; +} + static int bad_address(void *p) { unsigned long dummy; @@ -228,15 +246,26 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } -static noinline void do_no_context(struct pt_regs *regs) +static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault) { + enum fault_type fault_type; + unsigned long address; + bool is_write; + if (fixup_exception(regs)) return; + fault_type = get_fault_type(regs); + if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) { + address = get_fault_address(regs); + is_write = fault_is_write(regs); + if (kfence_handle_page_fault(address, is_write, regs)) + return; + } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - if (get_fault_type(regs) == KERNEL_FAULT) + if (fault_type == KERNEL_FAULT) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " in virtual kernel address space\n"); else @@ -255,7 +284,7 @@ static noinline void do_low_address(struct pt_regs *regs) die (regs, "Low-address protection"); } - do_no_context(regs); + do_no_context(regs, VM_FAULT_BADACCESS); } static noinline void do_sigbus(struct pt_regs *regs) @@ -286,28 +315,28 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault) fallthrough; case VM_FAULT_BADCONTEXT: case VM_FAULT_PFAULT: - do_no_context(regs); + do_no_context(regs, fault); break; case VM_FAULT_SIGNAL: if (!user_mode(regs)) - do_no_context(regs); + do_no_context(regs, fault); break; default: /* fault & VM_FAULT_ERROR */ if (fault & VM_FAULT_OOM) { if (!user_mode(regs)) - do_no_context(regs); + do_no_context(regs, fault); else pagefault_out_of_memory(); } else if (fault & VM_FAULT_SIGSEGV) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) - do_no_context(regs); + do_no_context(regs, fault); else do_sigsegv(regs, SEGV_MAPERR); } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) - do_no_context(regs); + do_no_context(regs, fault); else do_sigbus(regs); } else @@ -334,7 +363,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) struct mm_struct *mm; struct vm_area_struct *vma; enum fault_type type; - unsigned long trans_exc_code; unsigned long address; unsigned int flags; vm_fault_t fault; @@ -351,9 +379,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) return 0; mm = tsk->mm; - trans_exc_code = regs->int_parm_long; - address = trans_exc_code & __FAIL_ADDR_MASK; - is_write = (trans_exc_code & store_indication) == 0x400; + address = get_fault_address(regs); + is_write = fault_is_write(regs); /* * Verify that the fault happened in user space, that @@ -364,8 +391,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) type = get_fault_type(regs); switch (type) { case KERNEL_FAULT: - if (kfence_handle_page_fault(address, is_write, regs)) - return 0; goto out; case USER_FAULT: case GMAP_FAULT: diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 30ab55f868f6..144447d5cb4c 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -52,9 +52,9 @@ #include <linux/virtio_config.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); -static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); +pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); -unsigned long s390_invalid_asce; +unsigned long __bootdata_preserved(s390_invalid_asce); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); @@ -93,37 +93,8 @@ static void __init setup_zero_pages(void) void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; - unsigned long pgd_type, asce_bits; - psw_t psw; - - s390_invalid_asce = (unsigned long)invalid_pg_dir; - s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; - crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); - init_mm.pgd = swapper_pg_dir; - if (VMALLOC_END > _REGION2_SIZE) { - asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; - pgd_type = _REGION2_ENTRY_EMPTY; - } else { - asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; - pgd_type = _REGION3_ENTRY_EMPTY; - } - init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; - S390_lowcore.kernel_asce = init_mm.context.asce; - S390_lowcore.user_asce = s390_invalid_asce; - crst_table_init((unsigned long *) init_mm.pgd, pgd_type); - vmem_map_init(); - kasan_copy_shadow_mapping(); - - /* enable virtual mapping in kernel mode */ - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.user_asce, 7, 7); - __ctl_load(S390_lowcore.kernel_asce, 13, 13); - psw.mask = __extract_psw(); - psw_bits(psw).dat = 1; - psw_bits(psw).as = PSW_BITS_AS_HOME; - __load_psw_mask(psw.mask); - kasan_free_early_identity(); + vmem_map_init(); sparse_init(); zone_dma_bits = 31; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 9f988d4582ed..ef89a5f26853 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kasan.h> #include <linux/sched/task.h> -#include <linux/memblock.h> #include <linux/pgtable.h> #include <asm/pgalloc.h> #include <asm/kasan.h> @@ -15,16 +14,11 @@ static unsigned long segment_pos __initdata; static unsigned long segment_low __initdata; -static unsigned long pgalloc_pos __initdata; -static unsigned long pgalloc_low __initdata; -static unsigned long pgalloc_freeable __initdata; static bool has_edat __initdata; static bool has_nx __initdata; #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) -static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); - static void __init kasan_early_panic(const char *reason) { sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n"); @@ -39,7 +33,7 @@ static void * __init kasan_early_alloc_segment(void) if (segment_pos < segment_low) kasan_early_panic("out of memory during initialisation\n"); - return (void *)segment_pos; + return __va(segment_pos); } static void * __init kasan_early_alloc_pages(unsigned int order) @@ -49,7 +43,7 @@ static void * __init kasan_early_alloc_pages(unsigned int order) if (pgalloc_pos < pgalloc_low) kasan_early_panic("out of memory during initialisation\n"); - return (void *)pgalloc_pos; + return __va(pgalloc_pos); } static void * __init kasan_early_crst_alloc(unsigned long val) @@ -81,35 +75,37 @@ static pte_t * __init kasan_early_pte_alloc(void) } enum populate_mode { - POPULATE_ONE2ONE, POPULATE_MAP, POPULATE_ZERO_SHADOW, POPULATE_SHALLOW }; + +static inline pgprot_t pgprot_clear_bit(pgprot_t pgprot, unsigned long bit) +{ + return __pgprot(pgprot_val(pgprot) & ~bit); +} + static void __init kasan_early_pgtable_populate(unsigned long address, unsigned long end, enum populate_mode mode) { - unsigned long pgt_prot_zero, pgt_prot, sgt_prot; + pgprot_t pgt_prot_zero = PAGE_KERNEL_RO; + pgprot_t pgt_prot = PAGE_KERNEL; + pgprot_t sgt_prot = SEGMENT_KERNEL; pgd_t *pg_dir; p4d_t *p4_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; + pmd_t pmd; + pte_t pte; - pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO); - if (!has_nx) - pgt_prot_zero &= ~_PAGE_NOEXEC; - pgt_prot = pgprot_val(PAGE_KERNEL); - sgt_prot = pgprot_val(SEGMENT_KERNEL); - if (!has_nx || mode == POPULATE_ONE2ONE) { - pgt_prot &= ~_PAGE_NOEXEC; - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; + if (!has_nx) { + pgt_prot_zero = pgprot_clear_bit(pgt_prot_zero, _PAGE_NOEXEC); + pgt_prot = pgprot_clear_bit(pgt_prot, _PAGE_NOEXEC); + sgt_prot = pgprot_clear_bit(sgt_prot, _SEGMENT_ENTRY_NOEXEC); } - /* - * The first 1MB of 1:1 mapping is mapped with 4KB pages - */ while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -166,16 +162,13 @@ static void __init kasan_early_pgtable_populate(unsigned long address, pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); address = (address + PMD_SIZE) & PMD_MASK; continue; - } else if (has_edat && address) { - void *page; - - if (mode == POPULATE_ONE2ONE) { - page = (void *)address; - } else { - page = kasan_early_alloc_segment(); - memset(page, 0, _SEGMENT_SIZE); - } - set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot)); + } else if (has_edat) { + void *page = kasan_early_alloc_segment(); + + memset(page, 0, _SEGMENT_SIZE); + pmd = __pmd(__pa(page)); + pmd = set_pmd_bit(pmd, sgt_prot); + set_pmd(pm_dir, pmd); address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -192,18 +185,18 @@ static void __init kasan_early_pgtable_populate(unsigned long address, void *page; switch (mode) { - case POPULATE_ONE2ONE: - page = (void *)address; - set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); - break; case POPULATE_MAP: page = kasan_early_alloc_pages(0); memset(page, 0, PAGE_SIZE); - set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); + pte = __pte(__pa(page)); + pte = set_pte_bit(pte, pgt_prot); + set_pte(pt_dir, pte); break; case POPULATE_ZERO_SHADOW: page = kasan_early_shadow_page; - set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero)); + pte = __pte(__pa(page)); + pte = set_pte_bit(pte, pgt_prot_zero); + set_pte(pt_dir, pte); break; case POPULATE_SHALLOW: /* should never happen */ @@ -214,29 +207,6 @@ static void __init kasan_early_pgtable_populate(unsigned long address, } } -static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type) -{ - unsigned long asce_bits; - - asce_bits = asce_type | _ASCE_TABLE_LENGTH; - S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits; - S390_lowcore.user_asce = S390_lowcore.kernel_asce; - - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); - __ctl_load(S390_lowcore.kernel_asce, 13, 13); -} - -static void __init kasan_enable_dat(void) -{ - psw_t psw; - - psw.mask = __extract_psw(); - psw_bits(psw).dat = 1; - psw_bits(psw).as = PSW_BITS_AS_HOME; - __load_psw_mask(psw.mask); -} - static void __init kasan_early_detect_facilities(void) { if (test_facility(8)) { @@ -251,153 +221,81 @@ static void __init kasan_early_detect_facilities(void) void __init kasan_early_init(void) { - unsigned long shadow_alloc_size; - unsigned long initrd_end; - unsigned long memsize; - unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); - pte_t pte_z; + pte_t pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long untracked_end = MODULES_VADDR; + unsigned long shadow_alloc_size; + unsigned long start, end; + int i; kasan_early_detect_facilities(); if (!has_nx) - pgt_prot &= ~_PAGE_NOEXEC; - pte_z = __pte(__pa(kasan_early_shadow_page) | pgt_prot); - - memsize = get_mem_detect_end(); - if (!memsize) - kasan_early_panic("cannot detect physical memory size\n"); - /* - * Kasan currently supports standby memory but only if it follows - * online memory (default allocation), i.e. no memory holes. - * - memsize represents end of online memory - * - ident_map_size represents online + standby and memory limits - * accounted. - * Kasan maps "memsize" right away. - * [0, memsize] - as identity mapping - * [__sha(0), __sha(memsize)] - shadow memory for identity mapping - * The rest [memsize, ident_map_size] if memsize < ident_map_size - * could be mapped/unmapped dynamically later during memory hotplug. - */ - memsize = min(memsize, ident_map_size); + pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY); /* init kasan zero shadow */ - crst_table_init((unsigned long *)kasan_early_shadow_p4d, - p4d_val(p4d_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pud, - pud_val(pud_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pmd, - pmd_val(pmd_z)); + crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); - shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT; - pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE); - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { - initrd_end = - round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE); - pgalloc_low = max(pgalloc_low, initrd_end); - } - - if (pgalloc_low + shadow_alloc_size > memsize) - kasan_early_panic("out of memory during initialisation\n"); - if (has_edat) { - segment_pos = round_down(memsize, _SEGMENT_SIZE); + shadow_alloc_size = get_mem_detect_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; + segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE); segment_low = segment_pos - shadow_alloc_size; + segment_low = round_down(segment_low, _SEGMENT_SIZE); pgalloc_pos = segment_low; - } else { - pgalloc_pos = memsize; } - init_mm.pgd = early_pg_dir; /* * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * | 1:1 ram mapping | /| 1/8 ram | - * | | / | | - * +- end of ram ----+ / +----------------+ - * | ... gap ... | / | | - * | |/ | kasan | - * +- shadow start --+ | zero | - * | 1/8 addr space | | page | - * +- shadow end -+ | mapping | - * | ... gap ... |\ | (untracked) | - * +- vmalloc area -+ \ | | - * | vmalloc_size | \ | | - * +- modules vaddr -+ \ +----------------+ - * | 2Gb | \| unmapped | allocated per module - * +-----------------+ +- shadow end ---+ + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan | + * | | / | zero page | + * +- vmalloc area -+ / | mapping | + * | vmalloc_size | / | (untracked) | + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ | unmapped | allocated per module + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ * * Current memory layout (KASAN_VMALLOC): - * +- 0 -------------+ +- shadow start -+ - * | 1:1 ram mapping | /| 1/8 ram | - * | | / | | - * +- end of ram ----+ / +----------------+ - * | ... gap ... | / | kasan | - * | |/ | zero | - * +- shadow start --+ | page | - * | 1/8 addr space | | mapping | - * +- shadow end -+ | (untracked) | - * | ... gap ... |\ | | - * +- vmalloc area -+ \ +- vmalloc area -+ - * | vmalloc_size | \ |shallow populate| - * +- modules vaddr -+ \ +- modules area -+ - * | 2Gb | \|shallow populate| - * +-----------------+ +- shadow end ---+ + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan zero page| (untracked) + * | | / | mapping | + * +- vmalloc area -+ / +----------------+ + * | vmalloc_size | / |shallow populate| + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ |shallow populate| + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ - kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP); + for_each_mem_detect_usable_block(i, &start, &end) + kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ - kasan_early_pgtable_populate(__sha(ident_map_size), - IS_ENABLED(CONFIG_KASAN_VMALLOC) ? - __sha(VMALLOC_START) : - __sha(MODULES_VADDR), + kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_ZERO_SHADOW); kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_ZERO_SHADOW); - /* memory allocated for identity mapping structs will be freed later */ - pgalloc_freeable = pgalloc_pos; - /* populate identity mapping */ - kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE); - kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2); - kasan_enable_dat(); /* enable kasan */ init_task.kasan_depth = 0; - memblock_reserve(pgalloc_pos, memsize - pgalloc_pos); sclp_early_printk("KernelAddressSanitizer initialized\n"); } - -void __init kasan_copy_shadow_mapping(void) -{ - /* - * At this point we are still running on early pages setup early_pg_dir, - * while swapper_pg_dir has just been initialized with identity mapping. - * Carry over shadow memory region from early_pg_dir to swapper_pg_dir. - */ - - pgd_t *pg_dir_src; - pgd_t *pg_dir_dst; - p4d_t *p4_dir_src; - p4d_t *p4_dir_dst; - - pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START); - pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START); - p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START); - p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START); - memcpy(p4_dir_dst, p4_dir_src, - (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); -} - -void __init kasan_free_early_identity(void) -{ - memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos); -} diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 4824d1cd33d8..d02a61620cfa 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -21,7 +21,7 @@ #include <asm/maccess.h> unsigned long __bootdata_preserved(__memcpy_real_area); -static __ro_after_init pte_t *memcpy_real_ptep; +pte_t *__bootdata_preserved(memcpy_real_ptep); static DEFINE_MUTEX(memcpy_real_mutex); static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) @@ -68,28 +68,17 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size) long copied; spin_lock_irqsave(&s390_kernel_write_lock, flags); - if (!(flags & PSW_MASK_DAT)) { - memcpy(dst, src, size); - } else { - while (size) { - copied = s390_kernel_write_odd(tmp, src, size); - tmp += copied; - src += copied; - size -= copied; - } + while (size) { + copied = s390_kernel_write_odd(tmp, src, size); + tmp += copied; + src += copied; + size -= copied; } spin_unlock_irqrestore(&s390_kernel_write_lock, flags); return dst; } -void __init memcpy_real_init(void) -{ - memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true); - if (!memcpy_real_ptep) - panic("Couldn't setup memcpy real area"); -} - size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count) { size_t len, copied, res = 0; @@ -162,7 +151,6 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) void *ptr = phys_to_virt(addr); void *bounce = ptr; struct lowcore *abs_lc; - unsigned long flags; unsigned long size; int this_cpu, cpu; @@ -178,10 +166,10 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) goto out; size = PAGE_SIZE - (addr & ~PAGE_MASK); if (addr < sizeof(struct lowcore)) { - abs_lc = get_abs_lowcore(&flags); + abs_lc = get_abs_lowcore(); ptr = (void *)abs_lc + addr; memcpy(bounce, ptr, size); - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(abs_lc); } else if (cpu == this_cpu) { ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); memcpy(bounce, ptr, size); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4909dcd762e8..6effb24de6d9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -302,6 +302,31 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(ptep_xchg_direct); +/* + * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that + * RDP can be used instead of IPTE. See also comments at pte_allow_rdp(). + */ +void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t new) +{ + preempt_disable(); + atomic_inc(&mm->context.flush_count); + if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_rdp(addr, ptep, 0, 0, 1); + else + __ptep_rdp(addr, ptep, 0, 0, 0); + /* + * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That + * means it is still valid and active, and must not be changed according + * to the architecture. But writing a new value that only differs in SW + * bits is allowed. + */ + set_pte(ptep, new); + atomic_dec(&mm->context.flush_count); + preempt_enable(); +} +EXPORT_SYMBOL(ptep_reset_dat_prot); + pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t new) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index ee1a97078527..4113a7ffa149 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/sort.h> #include <asm/cacheflush.h> #include <asm/nospec-branch.h> #include <asm/pgalloc.h> @@ -296,10 +297,7 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start) /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ if (end > VMALLOC_START) return; -#ifdef CONFIG_KASAN - if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) - return; -#endif + pmd = pmd_offset(pud, start); for (i = 0; i < PTRS_PER_PMD; i++, pmd++) if (!pmd_none(*pmd)) @@ -371,10 +369,6 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start) /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ if (end > VMALLOC_START) return; -#ifdef CONFIG_KASAN - if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) - return; -#endif pud = pud_offset(p4d, start); for (i = 0; i < PTRS_PER_PUD; i++, pud++) { @@ -425,10 +419,6 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start) /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ if (end > VMALLOC_START) return; -#ifdef CONFIG_KASAN - if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) - return; -#endif p4d = p4d_offset(pgd, start); for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { @@ -657,6 +647,23 @@ void vmem_unmap_4k_page(unsigned long addr) mutex_unlock(&vmem_mutex); } +static int __init memblock_region_cmp(const void *a, const void *b) +{ + const struct memblock_region *r1 = a; + const struct memblock_region *r2 = b; + + if (r1->base < r2->base) + return -1; + if (r1->base > r2->base) + return 1; + return 0; +} + +static void __init memblock_region_swap(void *a, void *b, int size) +{ + swap(*(struct memblock_region *)a, *(struct memblock_region *)b); +} + /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug @@ -664,11 +671,68 @@ void vmem_unmap_4k_page(unsigned long addr) */ void __init vmem_map_init(void) { + struct memblock_region memory_rwx_regions[] = { + { + .base = 0, + .size = sizeof(struct lowcore), + .flags = MEMBLOCK_NONE, +#ifdef CONFIG_NUMA + .nid = NUMA_NO_NODE, +#endif + }, + { + .base = __pa(_stext), + .size = _etext - _stext, + .flags = MEMBLOCK_NONE, +#ifdef CONFIG_NUMA + .nid = NUMA_NO_NODE, +#endif + }, + { + .base = __pa(_sinittext), + .size = _einittext - _sinittext, + .flags = MEMBLOCK_NONE, +#ifdef CONFIG_NUMA + .nid = NUMA_NO_NODE, +#endif + }, + { + .base = __stext_amode31, + .size = __etext_amode31 - __stext_amode31, + .flags = MEMBLOCK_NONE, +#ifdef CONFIG_NUMA + .nid = NUMA_NO_NODE, +#endif + }, + }; + struct memblock_type memory_rwx = { + .regions = memory_rwx_regions, + .cnt = ARRAY_SIZE(memory_rwx_regions), + .max = ARRAY_SIZE(memory_rwx_regions), + }; phys_addr_t base, end; u64 i; - for_each_mem_range(i, &base, &end) - vmem_add_range(base, end - base); + /* + * Set RW+NX attribute on all memory, except regions enumerated with + * memory_rwx exclude type. These regions need different attributes, + * which are enforced afterwards. + * + * __for_each_mem_range() iterate and exclude types should be sorted. + * The relative location of _stext and _sinittext is hardcoded in the + * linker script. However a location of __stext_amode31 and the kernel + * image itself are chosen dynamically. Thus, sort the exclude type. + */ + sort(&memory_rwx_regions, + ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]), + memblock_region_cmp, memblock_region_swap); + __for_each_mem_range(i, &memblock.memory, &memory_rwx, + NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) { + __set_memory((unsigned long)__va(base), + (end - base) >> PAGE_SHIFT, + SET_MEMORY_RW | SET_MEMORY_NX); + } + __set_memory((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); @@ -678,15 +742,14 @@ void __init vmem_map_init(void) __set_memory((unsigned long)_sinittext, (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, + __set_memory(__stext_amode31, + (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - /* lowcore requires 4k mapping for real addresses / prefixing */ - set_memory_4k(0, LC_PAGES); - /* lowcore must be executable for LPSWE */ - if (!static_key_enabled(&cpu_has_bear)) - set_memory_x(0, 1); + if (static_key_enabled(&cpu_has_bear)) + set_memory_nx(0, 1); + set_memory_nx(PAGE_SIZE, 1); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); |