summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/kasan/common.c233
-rw-r--r--mm/kasan/generic_report.c3
-rw-r--r--mm/kasan/kasan.h1
-rw-r--r--mm/vmalloc.c56
4 files changed, 285 insertions, 8 deletions
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 6814d6d6a023..df3371d5c572 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,6 +36,8 @@
#include <linux/bug.h>
#include <linux/uaccess.h>
+#include <asm/tlbflush.h>
+
#include "kasan.h"
#include "../slab.h"
@@ -590,6 +592,7 @@ void kasan_kfree_large(void *ptr, unsigned long ip)
/* The object will be poisoned by page_alloc. */
}
+#ifndef CONFIG_KASAN_VMALLOC
int kasan_module_alloc(void *addr, size_t size)
{
void *ret;
@@ -625,6 +628,7 @@ void kasan_free_shadow(const struct vm_struct *vm)
if (vm->flags & VM_KASAN)
vfree(kasan_mem_to_shadow(vm->addr));
}
+#endif
extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip);
@@ -744,3 +748,232 @@ static int __init kasan_memhotplug_init(void)
core_initcall(kasan_memhotplug_init);
#endif
+
+#ifdef CONFIG_KASAN_VMALLOC
+static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
+ void *unused)
+{
+ unsigned long page;
+ pte_t pte;
+
+ if (likely(!pte_none(*ptep)))
+ return 0;
+
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
+ pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
+
+ spin_lock(&init_mm.page_table_lock);
+ if (likely(pte_none(*ptep))) {
+ set_pte_at(&init_mm, addr, ptep, pte);
+ page = 0;
+ }
+ spin_unlock(&init_mm.page_table_lock);
+ if (page)
+ free_page(page);
+ return 0;
+}
+
+int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
+{
+ unsigned long shadow_start, shadow_end;
+ int ret;
+
+ shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr);
+ shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
+ shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr +
+ area->size);
+ shadow_end = ALIGN(shadow_end, PAGE_SIZE);
+
+ ret = apply_to_page_range(&init_mm, shadow_start,
+ shadow_end - shadow_start,
+ kasan_populate_vmalloc_pte, NULL);
+ if (ret)
+ return ret;
+
+ flush_cache_vmap(shadow_start, shadow_end);
+
+ kasan_unpoison_shadow(area->addr, requested_size);
+
+ area->flags |= VM_KASAN;
+
+ /*
+ * We need to be careful about inter-cpu effects here. Consider:
+ *
+ * CPU#0 CPU#1
+ * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ;
+ * p[99] = 1;
+ *
+ * With compiler instrumentation, that ends up looking like this:
+ *
+ * CPU#0 CPU#1
+ * // vmalloc() allocates memory
+ * // let a = area->addr
+ * // we reach kasan_populate_vmalloc
+ * // and call kasan_unpoison_shadow:
+ * STORE shadow(a), unpoison_val
+ * ...
+ * STORE shadow(a+99), unpoison_val x = LOAD p
+ * // rest of vmalloc process <data dependency>
+ * STORE p, a LOAD shadow(x+99)
+ *
+ * If there is no barrier between the end of unpoisioning the shadow
+ * and the store of the result to p, the stores could be committed
+ * in a different order by CPU#0, and CPU#1 could erroneously observe
+ * poison in the shadow.
+ *
+ * We need some sort of barrier between the stores.
+ *
+ * In the vmalloc() case, this is provided by a smp_wmb() in
+ * clear_vm_uninitialized_flag(). In the per-cpu allocator and in
+ * get_vm_area() and friends, the caller gets shadow allocated but
+ * doesn't have any pages mapped into the virtual address space that
+ * has been reserved. Mapping those pages in will involve taking and
+ * releasing a page-table lock, which will provide the barrier.
+ */
+
+ return 0;
+}
+
+/*
+ * Poison the shadow for a vmalloc region. Called as part of the
+ * freeing process at the time the region is freed.
+ */
+void kasan_poison_vmalloc(void *start, unsigned long size)
+{
+ size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
+ kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
+}
+
+static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
+ void *unused)
+{
+ unsigned long page;
+
+ page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
+
+ spin_lock(&init_mm.page_table_lock);
+
+ if (likely(!pte_none(*ptep))) {
+ pte_clear(&init_mm, addr, ptep);
+ free_page(page);
+ }
+ spin_unlock(&init_mm.page_table_lock);
+
+ return 0;
+}
+
+/*
+ * Release the backing for the vmalloc region [start, end), which
+ * lies within the free region [free_region_start, free_region_end).
+ *
+ * This can be run lazily, long after the region was freed. It runs
+ * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
+ * infrastructure.
+ *
+ * How does this work?
+ * -------------------
+ *
+ * We have a region that is page aligned, labelled as A.
+ * That might not map onto the shadow in a way that is page-aligned:
+ *
+ * start end
+ * v v
+ * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc
+ * -------- -------- -------- -------- --------
+ * | | | | |
+ * | | | /-------/ |
+ * \-------\|/------/ |/---------------/
+ * ||| ||
+ * |??AAAAAA|AAAAAAAA|AA??????| < shadow
+ * (1) (2) (3)
+ *
+ * First we align the start upwards and the end downwards, so that the
+ * shadow of the region aligns with shadow page boundaries. In the
+ * example, this gives us the shadow page (2). This is the shadow entirely
+ * covered by this allocation.
+ *
+ * Then we have the tricky bits. We want to know if we can free the
+ * partially covered shadow pages - (1) and (3) in the example. For this,
+ * we are given the start and end of the free region that contains this
+ * allocation. Extending our previous example, we could have:
+ *
+ * free_region_start free_region_end
+ * | start end |
+ * v v v v
+ * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc
+ * -------- -------- -------- -------- --------
+ * | | | | |
+ * | | | /-------/ |
+ * \-------\|/------/ |/---------------/
+ * ||| ||
+ * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow
+ * (1) (2) (3)
+ *
+ * Once again, we align the start of the free region up, and the end of
+ * the free region down so that the shadow is page aligned. So we can free
+ * page (1) - we know no allocation currently uses anything in that page,
+ * because all of it is in the vmalloc free region. But we cannot free
+ * page (3), because we can't be sure that the rest of it is unused.
+ *
+ * We only consider pages that contain part of the original region for
+ * freeing: we don't try to free other pages from the free region or we'd
+ * end up trying to free huge chunks of virtual address space.
+ *
+ * Concurrency
+ * -----------
+ *
+ * How do we know that we're not freeing a page that is simultaneously
+ * being used for a fresh allocation in kasan_populate_vmalloc(_pte)?
+ *
+ * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running
+ * at the same time. While we run under free_vmap_area_lock, the population
+ * code does not.
+ *
+ * free_vmap_area_lock instead operates to ensure that the larger range
+ * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and
+ * the per-cpu region-finding algorithm both run under free_vmap_area_lock,
+ * no space identified as free will become used while we are running. This
+ * means that so long as we are careful with alignment and only free shadow
+ * pages entirely covered by the free region, we will not run in to any
+ * trouble - any simultaneous allocations will be for disjoint regions.
+ */
+void kasan_release_vmalloc(unsigned long start, unsigned long end,
+ unsigned long free_region_start,
+ unsigned long free_region_end)
+{
+ void *shadow_start, *shadow_end;
+ unsigned long region_start, region_end;
+
+ region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+ region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+
+ free_region_start = ALIGN(free_region_start,
+ PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+
+ if (start != region_start &&
+ free_region_start < region_start)
+ region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
+
+ free_region_end = ALIGN_DOWN(free_region_end,
+ PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+
+ if (end != region_end &&
+ free_region_end > region_end)
+ region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
+
+ shadow_start = kasan_mem_to_shadow((void *)region_start);
+ shadow_end = kasan_mem_to_shadow((void *)region_end);
+
+ if (shadow_end > shadow_start) {
+ apply_to_page_range(&init_mm, (unsigned long)shadow_start,
+ (unsigned long)(shadow_end - shadow_start),
+ kasan_depopulate_vmalloc_pte, NULL);
+ flush_tlb_kernel_range((unsigned long)shadow_start,
+ (unsigned long)shadow_end);
+ }
+}
+#endif
diff --git a/mm/kasan/generic_report.c b/mm/kasan/generic_report.c
index 36c645939bc9..2d97efd4954f 100644
--- a/mm/kasan/generic_report.c
+++ b/mm/kasan/generic_report.c
@@ -86,6 +86,9 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info)
case KASAN_ALLOCA_RIGHT:
bug_type = "alloca-out-of-bounds";
break;
+ case KASAN_VMALLOC_INVALID:
+ bug_type = "vmalloc-out-of-bounds";
+ break;
}
return bug_type;
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 35cff6bbb716..3a083274628e 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -25,6 +25,7 @@
#endif
#define KASAN_GLOBAL_REDZONE 0xFA /* redzone for global variable */
+#define KASAN_VMALLOC_INVALID 0xF9 /* unallocated space in vmapped page */
/*
* Stack redzone shadow values
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 33e245ebe70c..4d3b3d60d893 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -683,7 +683,7 @@ insert_vmap_area_augment(struct vmap_area *va,
* free area is inserted. If VA has been merged, it is
* freed.
*/
-static __always_inline void
+static __always_inline struct vmap_area *
merge_or_add_vmap_area(struct vmap_area *va,
struct rb_root *root, struct list_head *head)
{
@@ -750,7 +750,10 @@ merge_or_add_vmap_area(struct vmap_area *va,
/* Free vmap_area object. */
kmem_cache_free(vmap_area_cachep, va);
- return;
+
+ /* Point to the new merged area. */
+ va = sibling;
+ merged = true;
}
}
@@ -759,6 +762,8 @@ insert:
link_va(va, root, parent, link, head);
augment_tree_propagate_from(va);
}
+
+ return va;
}
static __always_inline bool
@@ -1196,8 +1201,7 @@ static void free_vmap_area(struct vmap_area *va)
* Insert/Merge it back to the free tree/list.
*/
spin_lock(&free_vmap_area_lock);
- merge_or_add_vmap_area(va,
- &free_vmap_area_root, &free_vmap_area_list);
+ merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
spin_unlock(&free_vmap_area_lock);
}
@@ -1294,14 +1298,20 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
spin_lock(&free_vmap_area_lock);
llist_for_each_entry_safe(va, n_va, valist, purge_list) {
unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
+ unsigned long orig_start = va->va_start;
+ unsigned long orig_end = va->va_end;
/*
* Finally insert or merge lazily-freed area. It is
* detached and there is no need to "unlink" it from
* anything.
*/
- merge_or_add_vmap_area(va,
- &free_vmap_area_root, &free_vmap_area_list);
+ va = merge_or_add_vmap_area(va, &free_vmap_area_root,
+ &free_vmap_area_list);
+
+ if (is_vmalloc_or_module_addr((void *)orig_start))
+ kasan_release_vmalloc(orig_start, orig_end,
+ va->va_start, va->va_end);
atomic_long_sub(nr, &vmap_lazy_nr);
@@ -2090,6 +2100,22 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
setup_vmalloc_vm(area, va, flags, caller);
+ /*
+ * For KASAN, if we are in vmalloc space, we need to cover the shadow
+ * area with real memory. If we come here through VM_ALLOC, this is
+ * done by a higher level function that has access to the true size,
+ * which might not be a full page.
+ *
+ * We assume module space comes via VM_ALLOC path.
+ */
+ if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) {
+ if (kasan_populate_vmalloc(area->size, area)) {
+ unmap_vmap_area(va);
+ kfree(area);
+ return NULL;
+ }
+ }
+
return area;
}
@@ -2267,6 +2293,9 @@ static void __vunmap(const void *addr, int deallocate_pages)
debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
+ if (area->flags & VM_KASAN)
+ kasan_poison_vmalloc(area->addr, area->size);
+
vm_remove_mappings(area, deallocate_pages);
if (deallocate_pages) {
@@ -2519,6 +2548,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!addr)
return NULL;
+ if (is_vmalloc_or_module_addr(area->addr)) {
+ if (kasan_populate_vmalloc(real_size, area))
+ return NULL;
+ }
+
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
@@ -3400,6 +3434,12 @@ retry:
}
spin_unlock(&vmap_area_lock);
+ /* populate the shadow space outside of the lock */
+ for (area = 0; area < nr_vms; area++) {
+ /* assume success here */
+ kasan_populate_vmalloc(sizes[area], vms[area]);
+ }
+
kfree(vas);
return vms;
@@ -3411,8 +3451,8 @@ recovery:
* and when pcpu_get_vm_areas() is success.
*/
while (area--) {
- merge_or_add_vmap_area(vas[area],
- &free_vmap_area_root, &free_vmap_area_list);
+ merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
+ &free_vmap_area_list);
vas[area] = NULL;
}