summaryrefslogtreecommitdiff
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/dump_pagetables.c16
-rw-r--r--arch/s390/mm/extable.c9
-rw-r--r--arch/s390/mm/fault.c63
-rw-r--r--arch/s390/mm/init.c35
-rw-r--r--arch/s390/mm/kasan_init.c246
-rw-r--r--arch/s390/mm/maccess.c28
-rw-r--r--arch/s390/mm/pgtable.c25
-rw-r--r--arch/s390/mm/vmem.c103
8 files changed, 250 insertions, 275 deletions
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9953819d7959..ba5f80268878 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -33,10 +33,6 @@ enum address_markers_idx {
#endif
IDENTITY_AFTER_NR,
IDENTITY_AFTER_END_NR,
-#ifdef CONFIG_KASAN
- KASAN_SHADOW_START_NR,
- KASAN_SHADOW_END_NR,
-#endif
VMEMMAP_NR,
VMEMMAP_END_NR,
VMALLOC_NR,
@@ -47,6 +43,10 @@ enum address_markers_idx {
ABS_LOWCORE_END_NR,
MEMCPY_REAL_NR,
MEMCPY_REAL_END_NR,
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
};
static struct addr_marker address_markers[] = {
@@ -62,10 +62,6 @@ static struct addr_marker address_markers[] = {
#endif
[IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
[IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
-#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
- [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
-#endif
[VMEMMAP_NR] = {0, "vmemmap Area Start"},
[VMEMMAP_END_NR] = {0, "vmemmap Area End"},
[VMALLOC_NR] = {0, "vmalloc Area Start"},
@@ -76,6 +72,10 @@ static struct addr_marker address_markers[] = {
[ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
[MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
[MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
+#ifdef CONFIG_KASAN
+ [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
+ [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
+#endif
{ -1, NULL }
};
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 1e4d2187541a..fe87291df95d 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -47,13 +47,16 @@ static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struc
return true;
}
-static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
+ bool pair, struct pt_regs *regs)
{
unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
regs->gprs[reg_err] = -EFAULT;
regs->gprs[reg_zero] = 0;
+ if (pair)
+ regs->gprs[reg_zero + 1] = 0;
regs->psw.addr = extable_fixup(ex);
return true;
}
@@ -75,7 +78,9 @@ bool fixup_exception(struct pt_regs *regs)
case EX_TYPE_UA_LOAD_MEM:
return ex_handler_ua_load_mem(ex, regs);
case EX_TYPE_UA_LOAD_REG:
- return ex_handler_ua_load_reg(ex, regs);
+ return ex_handler_ua_load_reg(ex, false, regs);
+ case EX_TYPE_UA_LOAD_REGPAIR:
+ return ex_handler_ua_load_reg(ex, true, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9649d9382e0a..a2632fd97d00 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -46,11 +46,15 @@
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
-#define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000)
-#define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000)
-#define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000)
-#define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000)
-#define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000)
+/*
+ * Allocate private vm_fault_reason from top. Please make sure it won't
+ * collide with vm_fault_reason.
+ */
+#define VM_FAULT_BADCONTEXT ((__force vm_fault_t)0x80000000)
+#define VM_FAULT_BADMAP ((__force vm_fault_t)0x40000000)
+#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x20000000)
+#define VM_FAULT_SIGNAL ((__force vm_fault_t)0x10000000)
+#define VM_FAULT_PFAULT ((__force vm_fault_t)0x8000000)
enum fault_type {
KERNEL_FAULT,
@@ -96,6 +100,20 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
return KERNEL_FAULT;
}
+static unsigned long get_fault_address(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code = regs->int_parm_long;
+
+ return trans_exc_code & __FAIL_ADDR_MASK;
+}
+
+static bool fault_is_write(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code = regs->int_parm_long;
+
+ return (trans_exc_code & store_indication) == 0x400;
+}
+
static int bad_address(void *p)
{
unsigned long dummy;
@@ -228,15 +246,26 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-static noinline void do_no_context(struct pt_regs *regs)
+static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault)
{
+ enum fault_type fault_type;
+ unsigned long address;
+ bool is_write;
+
if (fixup_exception(regs))
return;
+ fault_type = get_fault_type(regs);
+ if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) {
+ address = get_fault_address(regs);
+ is_write = fault_is_write(regs);
+ if (kfence_handle_page_fault(address, is_write, regs))
+ return;
+ }
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- if (get_fault_type(regs) == KERNEL_FAULT)
+ if (fault_type == KERNEL_FAULT)
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" in virtual kernel address space\n");
else
@@ -255,7 +284,7 @@ static noinline void do_low_address(struct pt_regs *regs)
die (regs, "Low-address protection");
}
- do_no_context(regs);
+ do_no_context(regs, VM_FAULT_BADACCESS);
}
static noinline void do_sigbus(struct pt_regs *regs)
@@ -286,28 +315,28 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
fallthrough;
case VM_FAULT_BADCONTEXT:
case VM_FAULT_PFAULT:
- do_no_context(regs);
+ do_no_context(regs, fault);
break;
case VM_FAULT_SIGNAL:
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
break;
default: /* fault & VM_FAULT_ERROR */
if (fault & VM_FAULT_OOM) {
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
pagefault_out_of_memory();
} else if (fault & VM_FAULT_SIGSEGV) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
do_sigsegv(regs, SEGV_MAPERR);
} else if (fault & VM_FAULT_SIGBUS) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
do_sigbus(regs);
} else
@@ -334,7 +363,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
struct mm_struct *mm;
struct vm_area_struct *vma;
enum fault_type type;
- unsigned long trans_exc_code;
unsigned long address;
unsigned int flags;
vm_fault_t fault;
@@ -351,9 +379,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
return 0;
mm = tsk->mm;
- trans_exc_code = regs->int_parm_long;
- address = trans_exc_code & __FAIL_ADDR_MASK;
- is_write = (trans_exc_code & store_indication) == 0x400;
+ address = get_fault_address(regs);
+ is_write = fault_is_write(regs);
/*
* Verify that the fault happened in user space, that
@@ -364,8 +391,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
type = get_fault_type(regs);
switch (type) {
case KERNEL_FAULT:
- if (kfence_handle_page_fault(address, is_write, regs))
- return 0;
goto out;
case USER_FAULT:
case GMAP_FAULT:
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 30ab55f868f6..144447d5cb4c 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -52,9 +52,9 @@
#include <linux/virtio_config.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
-static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
+pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
-unsigned long s390_invalid_asce;
+unsigned long __bootdata_preserved(s390_invalid_asce);
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
@@ -93,37 +93,8 @@ static void __init setup_zero_pages(void)
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long pgd_type, asce_bits;
- psw_t psw;
-
- s390_invalid_asce = (unsigned long)invalid_pg_dir;
- s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
- init_mm.pgd = swapper_pg_dir;
- if (VMALLOC_END > _REGION2_SIZE) {
- asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION2_ENTRY_EMPTY;
- } else {
- asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION3_ENTRY_EMPTY;
- }
- init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.kernel_asce = init_mm.context.asce;
- S390_lowcore.user_asce = s390_invalid_asce;
- crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
- vmem_map_init();
- kasan_copy_shadow_mapping();
-
- /* enable virtual mapping in kernel mode */
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.user_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
- kasan_free_early_identity();
+ vmem_map_init();
sparse_init();
zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 9f988d4582ed..ef89a5f26853 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kasan.h>
#include <linux/sched/task.h>
-#include <linux/memblock.h>
#include <linux/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/kasan.h>
@@ -15,16 +14,11 @@
static unsigned long segment_pos __initdata;
static unsigned long segment_low __initdata;
-static unsigned long pgalloc_pos __initdata;
-static unsigned long pgalloc_low __initdata;
-static unsigned long pgalloc_freeable __initdata;
static bool has_edat __initdata;
static bool has_nx __initdata;
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
-static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
-
static void __init kasan_early_panic(const char *reason)
{
sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
@@ -39,7 +33,7 @@ static void * __init kasan_early_alloc_segment(void)
if (segment_pos < segment_low)
kasan_early_panic("out of memory during initialisation\n");
- return (void *)segment_pos;
+ return __va(segment_pos);
}
static void * __init kasan_early_alloc_pages(unsigned int order)
@@ -49,7 +43,7 @@ static void * __init kasan_early_alloc_pages(unsigned int order)
if (pgalloc_pos < pgalloc_low)
kasan_early_panic("out of memory during initialisation\n");
- return (void *)pgalloc_pos;
+ return __va(pgalloc_pos);
}
static void * __init kasan_early_crst_alloc(unsigned long val)
@@ -81,35 +75,37 @@ static pte_t * __init kasan_early_pte_alloc(void)
}
enum populate_mode {
- POPULATE_ONE2ONE,
POPULATE_MAP,
POPULATE_ZERO_SHADOW,
POPULATE_SHALLOW
};
+
+static inline pgprot_t pgprot_clear_bit(pgprot_t pgprot, unsigned long bit)
+{
+ return __pgprot(pgprot_val(pgprot) & ~bit);
+}
+
static void __init kasan_early_pgtable_populate(unsigned long address,
unsigned long end,
enum populate_mode mode)
{
- unsigned long pgt_prot_zero, pgt_prot, sgt_prot;
+ pgprot_t pgt_prot_zero = PAGE_KERNEL_RO;
+ pgprot_t pgt_prot = PAGE_KERNEL;
+ pgprot_t sgt_prot = SEGMENT_KERNEL;
pgd_t *pg_dir;
p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
+ pmd_t pmd;
+ pte_t pte;
- pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
- if (!has_nx)
- pgt_prot_zero &= ~_PAGE_NOEXEC;
- pgt_prot = pgprot_val(PAGE_KERNEL);
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
- if (!has_nx || mode == POPULATE_ONE2ONE) {
- pgt_prot &= ~_PAGE_NOEXEC;
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ if (!has_nx) {
+ pgt_prot_zero = pgprot_clear_bit(pgt_prot_zero, _PAGE_NOEXEC);
+ pgt_prot = pgprot_clear_bit(pgt_prot, _PAGE_NOEXEC);
+ sgt_prot = pgprot_clear_bit(sgt_prot, _SEGMENT_ENTRY_NOEXEC);
}
- /*
- * The first 1MB of 1:1 mapping is mapped with 4KB pages
- */
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -166,16 +162,13 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte);
address = (address + PMD_SIZE) & PMD_MASK;
continue;
- } else if (has_edat && address) {
- void *page;
-
- if (mode == POPULATE_ONE2ONE) {
- page = (void *)address;
- } else {
- page = kasan_early_alloc_segment();
- memset(page, 0, _SEGMENT_SIZE);
- }
- set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot));
+ } else if (has_edat) {
+ void *page = kasan_early_alloc_segment();
+
+ memset(page, 0, _SEGMENT_SIZE);
+ pmd = __pmd(__pa(page));
+ pmd = set_pmd_bit(pmd, sgt_prot);
+ set_pmd(pm_dir, pmd);
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -192,18 +185,18 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
void *page;
switch (mode) {
- case POPULATE_ONE2ONE:
- page = (void *)address;
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
- break;
case POPULATE_MAP:
page = kasan_early_alloc_pages(0);
memset(page, 0, PAGE_SIZE);
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
+ pte = __pte(__pa(page));
+ pte = set_pte_bit(pte, pgt_prot);
+ set_pte(pt_dir, pte);
break;
case POPULATE_ZERO_SHADOW:
page = kasan_early_shadow_page;
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero));
+ pte = __pte(__pa(page));
+ pte = set_pte_bit(pte, pgt_prot_zero);
+ set_pte(pt_dir, pte);
break;
case POPULATE_SHALLOW:
/* should never happen */
@@ -214,29 +207,6 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
}
}
-static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type)
-{
- unsigned long asce_bits;
-
- asce_bits = asce_type | _ASCE_TABLE_LENGTH;
- S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.user_asce = S390_lowcore.kernel_asce;
-
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.kernel_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-}
-
-static void __init kasan_enable_dat(void)
-{
- psw_t psw;
-
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
-}
-
static void __init kasan_early_detect_facilities(void)
{
if (test_facility(8)) {
@@ -251,153 +221,81 @@ static void __init kasan_early_detect_facilities(void)
void __init kasan_early_init(void)
{
- unsigned long shadow_alloc_size;
- unsigned long initrd_end;
- unsigned long memsize;
- unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
- pte_t pte_z;
+ pte_t pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+ unsigned long untracked_end = MODULES_VADDR;
+ unsigned long shadow_alloc_size;
+ unsigned long start, end;
+ int i;
kasan_early_detect_facilities();
if (!has_nx)
- pgt_prot &= ~_PAGE_NOEXEC;
- pte_z = __pte(__pa(kasan_early_shadow_page) | pgt_prot);
-
- memsize = get_mem_detect_end();
- if (!memsize)
- kasan_early_panic("cannot detect physical memory size\n");
- /*
- * Kasan currently supports standby memory but only if it follows
- * online memory (default allocation), i.e. no memory holes.
- * - memsize represents end of online memory
- * - ident_map_size represents online + standby and memory limits
- * accounted.
- * Kasan maps "memsize" right away.
- * [0, memsize] - as identity mapping
- * [__sha(0), __sha(memsize)] - shadow memory for identity mapping
- * The rest [memsize, ident_map_size] if memsize < ident_map_size
- * could be mapped/unmapped dynamically later during memory hotplug.
- */
- memsize = min(memsize, ident_map_size);
+ pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
- crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY);
/* init kasan zero shadow */
- crst_table_init((unsigned long *)kasan_early_shadow_p4d,
- p4d_val(p4d_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pud,
- pud_val(pud_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pmd,
- pmd_val(pmd_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
- shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT;
- pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
- initrd_end =
- round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
- pgalloc_low = max(pgalloc_low, initrd_end);
- }
-
- if (pgalloc_low + shadow_alloc_size > memsize)
- kasan_early_panic("out of memory during initialisation\n");
-
if (has_edat) {
- segment_pos = round_down(memsize, _SEGMENT_SIZE);
+ shadow_alloc_size = get_mem_detect_usable_total() >> KASAN_SHADOW_SCALE_SHIFT;
+ segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE);
segment_low = segment_pos - shadow_alloc_size;
+ segment_low = round_down(segment_low, _SEGMENT_SIZE);
pgalloc_pos = segment_low;
- } else {
- pgalloc_pos = memsize;
}
- init_mm.pgd = early_pg_dir;
/*
* Current memory layout:
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * | | / | |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... | / | |
- * | |/ | kasan |
- * +- shadow start --+ | zero |
- * | 1/8 addr space | | page |
- * +- shadow end -+ | mapping |
- * | ... gap ... |\ | (untracked) |
- * +- vmalloc area -+ \ | |
- * | vmalloc_size | \ | |
- * +- modules vaddr -+ \ +----------------+
- * | 2Gb | \| unmapped | allocated per module
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan |
+ * | | / | zero page |
+ * +- vmalloc area -+ / | mapping |
+ * | vmalloc_size | / | (untracked) |
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ | unmapped | allocated per module
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
*
* Current memory layout (KASAN_VMALLOC):
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * | | / | |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... | / | kasan |
- * | |/ | zero |
- * +- shadow start --+ | page |
- * | 1/8 addr space | | mapping |
- * +- shadow end -+ | (untracked) |
- * | ... gap ... |\ | |
- * +- vmalloc area -+ \ +- vmalloc area -+
- * | vmalloc_size | \ |shallow populate|
- * +- modules vaddr -+ \ +- modules area -+
- * | 2Gb | \|shallow populate|
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan zero page| (untracked)
+ * | | / | mapping |
+ * +- vmalloc area -+ / +----------------+
+ * | vmalloc_size | / |shallow populate|
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ |shallow populate|
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
*/
/* populate kasan shadow (for identity mapping and zero page mapping) */
- kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP);
+ for_each_mem_detect_usable_block(i, &start, &end)
+ kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END),
POPULATE_SHALLOW);
}
/* populate kasan shadow for untracked memory */
- kasan_early_pgtable_populate(__sha(ident_map_size),
- IS_ENABLED(CONFIG_KASAN_VMALLOC) ?
- __sha(VMALLOC_START) :
- __sha(MODULES_VADDR),
+ kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_end),
POPULATE_ZERO_SHADOW);
kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE),
POPULATE_ZERO_SHADOW);
- /* memory allocated for identity mapping structs will be freed later */
- pgalloc_freeable = pgalloc_pos;
- /* populate identity mapping */
- kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE);
- kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2);
- kasan_enable_dat();
/* enable kasan */
init_task.kasan_depth = 0;
- memblock_reserve(pgalloc_pos, memsize - pgalloc_pos);
sclp_early_printk("KernelAddressSanitizer initialized\n");
}
-
-void __init kasan_copy_shadow_mapping(void)
-{
- /*
- * At this point we are still running on early pages setup early_pg_dir,
- * while swapper_pg_dir has just been initialized with identity mapping.
- * Carry over shadow memory region from early_pg_dir to swapper_pg_dir.
- */
-
- pgd_t *pg_dir_src;
- pgd_t *pg_dir_dst;
- p4d_t *p4_dir_src;
- p4d_t *p4_dir_dst;
-
- pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START);
- pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START);
- p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START);
- p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START);
- memcpy(p4_dir_dst, p4_dir_src,
- (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t));
-}
-
-void __init kasan_free_early_identity(void)
-{
- memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
-}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 4824d1cd33d8..d02a61620cfa 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -21,7 +21,7 @@
#include <asm/maccess.h>
unsigned long __bootdata_preserved(__memcpy_real_area);
-static __ro_after_init pte_t *memcpy_real_ptep;
+pte_t *__bootdata_preserved(memcpy_real_ptep);
static DEFINE_MUTEX(memcpy_real_mutex);
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
@@ -68,28 +68,17 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
long copied;
spin_lock_irqsave(&s390_kernel_write_lock, flags);
- if (!(flags & PSW_MASK_DAT)) {
- memcpy(dst, src, size);
- } else {
- while (size) {
- copied = s390_kernel_write_odd(tmp, src, size);
- tmp += copied;
- src += copied;
- size -= copied;
- }
+ while (size) {
+ copied = s390_kernel_write_odd(tmp, src, size);
+ tmp += copied;
+ src += copied;
+ size -= copied;
}
spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
return dst;
}
-void __init memcpy_real_init(void)
-{
- memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
- if (!memcpy_real_ptep)
- panic("Couldn't setup memcpy real area");
-}
-
size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
size_t len, copied, res = 0;
@@ -162,7 +151,6 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
void *ptr = phys_to_virt(addr);
void *bounce = ptr;
struct lowcore *abs_lc;
- unsigned long flags;
unsigned long size;
int this_cpu, cpu;
@@ -178,10 +166,10 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
goto out;
size = PAGE_SIZE - (addr & ~PAGE_MASK);
if (addr < sizeof(struct lowcore)) {
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
ptr = (void *)abs_lc + addr;
memcpy(bounce, ptr, size);
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
} else if (cpu == this_cpu) {
ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
memcpy(bounce, ptr, size);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4909dcd762e8..6effb24de6d9 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -302,6 +302,31 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_xchg_direct);
+/*
+ * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
+ * RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
+ */
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t new)
+{
+ preempt_disable();
+ atomic_inc(&mm->context.flush_count);
+ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_rdp(addr, ptep, 0, 0, 1);
+ else
+ __ptep_rdp(addr, ptep, 0, 0, 0);
+ /*
+ * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
+ * means it is still valid and active, and must not be changed according
+ * to the architecture. But writing a new value that only differs in SW
+ * bits is allowed.
+ */
+ set_pte(ptep, new);
+ atomic_dec(&mm->context.flush_count);
+ preempt_enable();
+}
+EXPORT_SYMBOL(ptep_reset_dat_prot);
+
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t new)
{
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ee1a97078527..4113a7ffa149 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <asm/cacheflush.h>
#include <asm/nospec-branch.h>
#include <asm/pgalloc.h>
@@ -296,10 +297,7 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
+
pmd = pmd_offset(pud, start);
for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
if (!pmd_none(*pmd))
@@ -371,10 +369,6 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
pud = pud_offset(p4d, start);
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
@@ -425,10 +419,6 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
p4d = p4d_offset(pgd, start);
for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
@@ -657,6 +647,23 @@ void vmem_unmap_4k_page(unsigned long addr)
mutex_unlock(&vmem_mutex);
}
+static int __init memblock_region_cmp(const void *a, const void *b)
+{
+ const struct memblock_region *r1 = a;
+ const struct memblock_region *r2 = b;
+
+ if (r1->base < r2->base)
+ return -1;
+ if (r1->base > r2->base)
+ return 1;
+ return 0;
+}
+
+static void __init memblock_region_swap(void *a, void *b, int size)
+{
+ swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
+}
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
@@ -664,11 +671,68 @@ void vmem_unmap_4k_page(unsigned long addr)
*/
void __init vmem_map_init(void)
{
+ struct memblock_region memory_rwx_regions[] = {
+ {
+ .base = 0,
+ .size = sizeof(struct lowcore),
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_stext),
+ .size = _etext - _stext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_sinittext),
+ .size = _einittext - _sinittext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __stext_amode31,
+ .size = __etext_amode31 - __stext_amode31,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ };
+ struct memblock_type memory_rwx = {
+ .regions = memory_rwx_regions,
+ .cnt = ARRAY_SIZE(memory_rwx_regions),
+ .max = ARRAY_SIZE(memory_rwx_regions),
+ };
phys_addr_t base, end;
u64 i;
- for_each_mem_range(i, &base, &end)
- vmem_add_range(base, end - base);
+ /*
+ * Set RW+NX attribute on all memory, except regions enumerated with
+ * memory_rwx exclude type. These regions need different attributes,
+ * which are enforced afterwards.
+ *
+ * __for_each_mem_range() iterate and exclude types should be sorted.
+ * The relative location of _stext and _sinittext is hardcoded in the
+ * linker script. However a location of __stext_amode31 and the kernel
+ * image itself are chosen dynamically. Thus, sort the exclude type.
+ */
+ sort(&memory_rwx_regions,
+ ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
+ memblock_region_cmp, memblock_region_swap);
+ __for_each_mem_range(i, &memblock.memory, &memory_rwx,
+ NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
+ __set_memory((unsigned long)__va(base),
+ (end - base) >> PAGE_SHIFT,
+ SET_MEMORY_RW | SET_MEMORY_NX);
+ }
+
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
@@ -678,15 +742,14 @@ void __init vmem_map_init(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
+ __set_memory(__stext_amode31,
+ (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- /* lowcore requires 4k mapping for real addresses / prefixing */
- set_memory_4k(0, LC_PAGES);
-
/* lowcore must be executable for LPSWE */
- if (!static_key_enabled(&cpu_has_bear))
- set_memory_x(0, 1);
+ if (static_key_enabled(&cpu_has_bear))
+ set_memory_nx(0, 1);
+ set_memory_nx(PAGE_SIZE, 1);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);