diff options
author | Pavel Tikhomirov <ptikhomirov@virtuozzo.com> | 2024-07-25 12:12:15 +0800 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2024-09-01 20:25:49 -0700 |
commit | 6c99d4eb7c5e0999349cdb9d824ea0ac450d0c8f (patch) | |
tree | afd55a353f15f9b1c8091a6af6446f5d042c1c57 /mm/kmemleak.c | |
parent | fbe76a6557a83af5ef3819fd7b7ffd0a5d3b4e51 (diff) |
kmemleak: enable tracking for percpu pointers
Patch series "kmemleak: support for percpu memory leak detect'.
This is a rework of this series:
https://lore.kernel.org/lkml/20200921020007.35803-1-chenjun102@huawei.com/
Originally I was investigating a percpu leak on our customer nodes and
having this functionality was a huge help, which lead to this fix [1].
So probably it's a good idea to have it in mainstream too, especially as
after [2] it became much easier to implement (we already have a separate
tree for percpu pointers).
[1] commit 0af8c09c89681 ("netfilter: x_tables: fix percpu counter block leak on error path when creating new netns")
[2] commit 39042079a0c24 ("kmemleak: avoid RCU stalls when freeing metadata for per-CPU pointers")
This patch (of 2):
This basically does:
- Add min_percpu_addr and max_percpu_addr to filter out unrelated data
similar to min_addr and max_addr;
- Set min_count for percpu pointers to 1 to start tracking them;
- Calculate checksum of percpu area as xor of crc32 for each cpu;
- Split pointer lookup and update refs code into separate helper and use
it twice: once as if the pointer is a virtual pointer and once as if
it's percpu.
[ptikhomirov@virtuozzo.com: v2]
Link: https://lkml.kernel.org/r/20240731025526.157529-2-ptikhomirov@virtuozzo.com
Link: https://lkml.kernel.org/r/20240725041223.872472-1-ptikhomirov@virtuozzo.com
Link: https://lkml.kernel.org/r/20240725041223.872472-2-ptikhomirov@virtuozzo.com
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: Chen Jun <chenjun102@huawei.com>
Cc: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/kmemleak.c')
-rw-r--r-- | mm/kmemleak.c | 153 |
1 files changed, 94 insertions, 59 deletions
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 764b08100570..6b498c6d9c34 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -224,6 +224,10 @@ static int kmemleak_error; static unsigned long min_addr = ULONG_MAX; static unsigned long max_addr; +/* minimum and maximum address that may be valid per-CPU pointers */ +static unsigned long min_percpu_addr = ULONG_MAX; +static unsigned long max_percpu_addr; + static struct task_struct *scan_thread; /* used to avoid reporting of recently allocated objects */ static unsigned long jiffies_min_age; @@ -294,13 +298,20 @@ static void hex_dump_object(struct seq_file *seq, const u8 *ptr = (const u8 *)object->pointer; size_t len; - if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU))) + if (WARN_ON_ONCE(object->flags & OBJECT_PHYS)) return; + if (object->flags & OBJECT_PERCPU) + ptr = (const u8 *)this_cpu_ptr((void __percpu *)object->pointer); + /* limit the number of lines to HEX_MAX_LINES */ len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE); - warn_or_seq_printf(seq, " hex dump (first %zu bytes):\n", len); + if (object->flags & OBJECT_PERCPU) + warn_or_seq_printf(seq, " hex dump (first %zu bytes on cpu %d):\n", + len, raw_smp_processor_id()); + else + warn_or_seq_printf(seq, " hex dump (first %zu bytes):\n", len); kasan_disable_current(); warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE, HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII); @@ -695,10 +706,14 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr, untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); /* - * Only update min_addr and max_addr with object - * storing virtual address. + * Only update min_addr and max_addr with object storing virtual + * address. And update min_percpu_addr max_percpu_addr for per-CPU + * objects. */ - if (!(objflags & (OBJECT_PHYS | OBJECT_PERCPU))) { + if (objflags & OBJECT_PERCPU) { + min_percpu_addr = min(min_percpu_addr, untagged_ptr); + max_percpu_addr = max(max_percpu_addr, untagged_ptr + size); + } else if (!(objflags & OBJECT_PHYS)) { min_addr = min(min_addr, untagged_ptr); max_addr = max(max_addr, untagged_ptr + size); } @@ -1055,12 +1070,8 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, { pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size); - /* - * Percpu allocations are only scanned and not reported as leaks - * (min_count is set to 0). - */ if (kmemleak_enabled && ptr && !IS_ERR(ptr)) - create_object_percpu((unsigned long)ptr, size, 0, gfp); + create_object_percpu((unsigned long)ptr, size, 1, gfp); } EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu); @@ -1304,12 +1315,23 @@ static bool update_checksum(struct kmemleak_object *object) { u32 old_csum = object->checksum; - if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU))) + if (WARN_ON_ONCE(object->flags & OBJECT_PHYS)) return false; kasan_disable_current(); kcsan_disable_current(); - object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size); + if (object->flags & OBJECT_PERCPU) { + unsigned int cpu; + + object->checksum = 0; + for_each_possible_cpu(cpu) { + void *ptr = per_cpu_ptr((void __percpu *)object->pointer, cpu); + + object->checksum ^= crc32(0, kasan_reset_tag((void *)ptr), object->size); + } + } else { + object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size); + } kasan_enable_current(); kcsan_enable_current(); @@ -1340,6 +1362,64 @@ static void update_refs(struct kmemleak_object *object) } } +static void pointer_update_refs(struct kmemleak_object *scanned, + unsigned long pointer, unsigned int objflags) +{ + struct kmemleak_object *object; + unsigned long untagged_ptr; + unsigned long excess_ref; + + untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); + if (objflags & OBJECT_PERCPU) { + if (untagged_ptr < min_percpu_addr || untagged_ptr >= max_percpu_addr) + return; + } else { + if (untagged_ptr < min_addr || untagged_ptr >= max_addr) + return; + } + + /* + * No need for get_object() here since we hold kmemleak_lock. + * object->use_count cannot be dropped to 0 while the object + * is still present in object_tree_root and object_list + * (with updates protected by kmemleak_lock). + */ + object = __lookup_object(pointer, 1, objflags); + if (!object) + return; + if (object == scanned) + /* self referenced, ignore */ + return; + + /* + * Avoid the lockdep recursive warning on object->lock being + * previously acquired in scan_object(). These locks are + * enclosed by scan_mutex. + */ + raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); + /* only pass surplus references (object already gray) */ + if (color_gray(object)) { + excess_ref = object->excess_ref; + /* no need for update_refs() if object already gray */ + } else { + excess_ref = 0; + update_refs(object); + } + raw_spin_unlock(&object->lock); + + if (excess_ref) { + object = lookup_object(excess_ref, 0); + if (!object) + return; + if (object == scanned) + /* circular reference, ignore */ + return; + raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); + update_refs(object); + raw_spin_unlock(&object->lock); + } +} + /* * Memory scanning is a long process and it needs to be interruptible. This * function checks whether such interrupt condition occurred. @@ -1372,13 +1452,10 @@ static void scan_block(void *_start, void *_end, unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); unsigned long *end = _end - (BYTES_PER_POINTER - 1); unsigned long flags; - unsigned long untagged_ptr; raw_spin_lock_irqsave(&kmemleak_lock, flags); for (ptr = start; ptr < end; ptr++) { - struct kmemleak_object *object; unsigned long pointer; - unsigned long excess_ref; if (scan_should_stop()) break; @@ -1387,50 +1464,8 @@ static void scan_block(void *_start, void *_end, pointer = *(unsigned long *)kasan_reset_tag((void *)ptr); kasan_enable_current(); - untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); - if (untagged_ptr < min_addr || untagged_ptr >= max_addr) - continue; - - /* - * No need for get_object() here since we hold kmemleak_lock. - * object->use_count cannot be dropped to 0 while the object - * is still present in object_tree_root and object_list - * (with updates protected by kmemleak_lock). - */ - object = lookup_object(pointer, 1); - if (!object) - continue; - if (object == scanned) - /* self referenced, ignore */ - continue; - - /* - * Avoid the lockdep recursive warning on object->lock being - * previously acquired in scan_object(). These locks are - * enclosed by scan_mutex. - */ - raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); - /* only pass surplus references (object already gray) */ - if (color_gray(object)) { - excess_ref = object->excess_ref; - /* no need for update_refs() if object already gray */ - } else { - excess_ref = 0; - update_refs(object); - } - raw_spin_unlock(&object->lock); - - if (excess_ref) { - object = lookup_object(excess_ref, 0); - if (!object) - continue; - if (object == scanned) - /* circular reference, ignore */ - continue; - raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); - update_refs(object); - raw_spin_unlock(&object->lock); - } + pointer_update_refs(scanned, pointer, 0); + pointer_update_refs(scanned, pointer, OBJECT_PERCPU); } raw_spin_unlock_irqrestore(&kmemleak_lock, flags); } |