From 799fb82aa132fa3a3886b7872997a5a84e820062 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 3 Jan 2023 18:07:52 +0000 Subject: tools/vm: rename tools/vm to tools/mm Rename tools/vm to tools/mm for being more consistent with the code and documentation directories, and won't be confused with virtual machines. Link: https://lkml.kernel.org/r/20230103180754.129637-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/mm/.gitignore | 4 + tools/mm/Makefile | 32 + tools/mm/page-types.c | 1396 ++++++++++++++++++++++++++++++++++++++ tools/mm/page_owner_sort.c | 897 ++++++++++++++++++++++++ tools/mm/slabinfo-gnuplot.sh | 268 ++++++++ tools/mm/slabinfo.c | 1544 ++++++++++++++++++++++++++++++++++++++++++ tools/vm/.gitignore | 4 - tools/vm/Makefile | 32 - tools/vm/page-types.c | 1396 -------------------------------------- tools/vm/page_owner_sort.c | 897 ------------------------ tools/vm/slabinfo-gnuplot.sh | 268 -------- tools/vm/slabinfo.c | 1544 ------------------------------------------ 12 files changed, 4141 insertions(+), 4141 deletions(-) create mode 100644 tools/mm/.gitignore create mode 100644 tools/mm/Makefile create mode 100644 tools/mm/page-types.c create mode 100644 tools/mm/page_owner_sort.c create mode 100644 tools/mm/slabinfo-gnuplot.sh create mode 100644 tools/mm/slabinfo.c delete mode 100644 tools/vm/.gitignore delete mode 100644 tools/vm/Makefile delete mode 100644 tools/vm/page-types.c delete mode 100644 tools/vm/page_owner_sort.c delete mode 100644 tools/vm/slabinfo-gnuplot.sh delete mode 100644 tools/vm/slabinfo.c (limited to 'tools') diff --git a/tools/mm/.gitignore b/tools/mm/.gitignore new file mode 100644 index 000000000000..922879f93fc8 --- /dev/null +++ b/tools/mm/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +slabinfo +page-types +page_owner_sort diff --git a/tools/mm/Makefile b/tools/mm/Makefile new file mode 100644 index 000000000000..9860622cbb15 --- /dev/null +++ b/tools/mm/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for vm tools +# +include ../scripts/Makefile.include + +TARGETS=page-types slabinfo page_owner_sort + +LIB_DIR = ../lib/api +LIBS = $(LIB_DIR)/libapi.a + +CFLAGS = -Wall -Wextra -I../lib/ +LDFLAGS = $(LIBS) + +all: $(TARGETS) + +$(TARGETS): $(LIBS) + +$(LIBS): + make -C $(LIB_DIR) + +%: %.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +clean: + $(RM) page-types slabinfo page_owner_sort + make -C $(LIB_DIR) clean + +sbindir ?= /usr/sbin + +install: all + install -d $(DESTDIR)$(sbindir) + install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir) diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c new file mode 100644 index 000000000000..381dcc00cb62 --- /dev/null +++ b/tools/mm/page-types.c @@ -0,0 +1,1396 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * page-types: Tool for querying page flags + * + * Copyright (C) 2009 Intel corporation + * + * Authors: Wu Fengguang + */ + +#define _FILE_OFFSET_BITS 64 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../include/uapi/linux/magic.h" +#include "../../include/uapi/linux/kernel-page-flags.h" +#include + +#ifndef MAX_PATH +# define MAX_PATH 256 +#endif + +#ifndef STR +# define _STR(x) #x +# define STR(x) _STR(x) +#endif + +/* + * pagemap kernel ABI bits + */ + +#define PM_ENTRY_BYTES 8 +#define PM_PFRAME_BITS 55 +#define PM_PFRAME_MASK ((1LL << PM_PFRAME_BITS) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) +#define MAX_SWAPFILES_SHIFT 5 +#define PM_SWAP_OFFSET(x) (((x) & PM_PFRAME_MASK) >> MAX_SWAPFILES_SHIFT) +#define PM_SOFT_DIRTY (1ULL << 55) +#define PM_MMAP_EXCLUSIVE (1ULL << 56) +#define PM_FILE (1ULL << 61) +#define PM_SWAP (1ULL << 62) +#define PM_PRESENT (1ULL << 63) + +/* + * kernel page flags + */ + +#define KPF_BYTES 8 +#define PROC_KPAGEFLAGS "/proc/kpageflags" +#define PROC_KPAGECOUNT "/proc/kpagecount" +#define PROC_KPAGECGROUP "/proc/kpagecgroup" + +#define SYS_KERNEL_MM_PAGE_IDLE "/sys/kernel/mm/page_idle/bitmap" + +/* [32-] kernel hacking assistances */ +#define KPF_RESERVED 32 +#define KPF_MLOCKED 33 +#define KPF_MAPPEDTODISK 34 +#define KPF_PRIVATE 35 +#define KPF_PRIVATE_2 36 +#define KPF_OWNER_PRIVATE 37 +#define KPF_ARCH 38 +#define KPF_UNCACHED 39 +#define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 + +/* [47-] take some arbitrary free slots for expanding overloaded flags + * not part of kernel API + */ +#define KPF_ANON_EXCLUSIVE 47 +#define KPF_READAHEAD 48 +#define KPF_SLOB_FREE 49 +#define KPF_SLUB_FROZEN 50 +#define KPF_SLUB_DEBUG 51 +#define KPF_FILE 61 +#define KPF_SWAP 62 +#define KPF_MMAP_EXCLUSIVE 63 + +#define KPF_ALL_BITS ((uint64_t)~0ULL) +#define KPF_HACKERS_BITS (0xffffULL << 32) +#define KPF_OVERLOADED_BITS (0xffffULL << 48) +#define BIT(name) (1ULL << KPF_##name) +#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL)) + +static const char * const page_flag_names[] = { + [KPF_LOCKED] = "L:locked", + [KPF_ERROR] = "E:error", + [KPF_REFERENCED] = "R:referenced", + [KPF_UPTODATE] = "U:uptodate", + [KPF_DIRTY] = "D:dirty", + [KPF_LRU] = "l:lru", + [KPF_ACTIVE] = "A:active", + [KPF_SLAB] = "S:slab", + [KPF_WRITEBACK] = "W:writeback", + [KPF_RECLAIM] = "I:reclaim", + [KPF_BUDDY] = "B:buddy", + + [KPF_MMAP] = "M:mmap", + [KPF_ANON] = "a:anonymous", + [KPF_SWAPCACHE] = "s:swapcache", + [KPF_SWAPBACKED] = "b:swapbacked", + [KPF_COMPOUND_HEAD] = "H:compound_head", + [KPF_COMPOUND_TAIL] = "T:compound_tail", + [KPF_HUGE] = "G:huge", + [KPF_UNEVICTABLE] = "u:unevictable", + [KPF_HWPOISON] = "X:hwpoison", + [KPF_NOPAGE] = "n:nopage", + [KPF_KSM] = "x:ksm", + [KPF_THP] = "t:thp", + [KPF_OFFLINE] = "o:offline", + [KPF_PGTABLE] = "g:pgtable", + [KPF_ZERO_PAGE] = "z:zero_page", + [KPF_IDLE] = "i:idle_page", + + [KPF_RESERVED] = "r:reserved", + [KPF_MLOCKED] = "m:mlocked", + [KPF_MAPPEDTODISK] = "d:mappedtodisk", + [KPF_PRIVATE] = "P:private", + [KPF_PRIVATE_2] = "p:private_2", + [KPF_OWNER_PRIVATE] = "O:owner_private", + [KPF_ARCH] = "h:arch", + [KPF_UNCACHED] = "c:uncached", + [KPF_SOFTDIRTY] = "f:softdirty", + [KPF_ARCH_2] = "H:arch_2", + + [KPF_ANON_EXCLUSIVE] = "d:anon_exclusive", + [KPF_READAHEAD] = "I:readahead", + [KPF_SLOB_FREE] = "P:slob_free", + [KPF_SLUB_FROZEN] = "A:slub_frozen", + [KPF_SLUB_DEBUG] = "E:slub_debug", + + [KPF_FILE] = "F:file", + [KPF_SWAP] = "w:swap", + [KPF_MMAP_EXCLUSIVE] = "1:mmap_exclusive", +}; + + +/* + * data structures + */ + +static int opt_raw; /* for kernel developers */ +static int opt_list; /* list pages (in ranges) */ +static int opt_mark_idle; /* set accessed bit */ +static int opt_no_summary; /* don't show summary */ +static pid_t opt_pid; /* process to walk */ +const char *opt_file; /* file or directory path */ +static uint64_t opt_cgroup; /* cgroup inode */ +static int opt_list_cgroup;/* list page cgroup */ +static int opt_list_mapcnt;/* list page map count */ +static const char *opt_kpageflags;/* kpageflags file to parse */ + +#define MAX_ADDR_RANGES 1024 +static int nr_addr_ranges; +static unsigned long opt_offset[MAX_ADDR_RANGES]; +static unsigned long opt_size[MAX_ADDR_RANGES]; + +#define MAX_VMAS 10240 +static int nr_vmas; +static unsigned long pg_start[MAX_VMAS]; +static unsigned long pg_end[MAX_VMAS]; + +#define MAX_BIT_FILTERS 64 +static int nr_bit_filters; +static uint64_t opt_mask[MAX_BIT_FILTERS]; +static uint64_t opt_bits[MAX_BIT_FILTERS]; + +static int page_size; + +static int pagemap_fd; +static int kpageflags_fd; +static int kpagecount_fd = -1; +static int kpagecgroup_fd = -1; +static int page_idle_fd = -1; + +static int opt_hwpoison; +static int opt_unpoison; + +static const char *hwpoison_debug_fs; +static int hwpoison_inject_fd; +static int hwpoison_forget_fd; + +#define HASH_SHIFT 13 +#define HASH_SIZE (1 << HASH_SHIFT) +#define HASH_MASK (HASH_SIZE - 1) +#define HASH_KEY(flags) (flags & HASH_MASK) + +static unsigned long total_pages; +static unsigned long nr_pages[HASH_SIZE]; +static uint64_t page_flags[HASH_SIZE]; + + +/* + * helper functions + */ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1 : __min2; }) + +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1 : __max2; }) + +static unsigned long pages2mb(unsigned long pages) +{ + return (pages * page_size) >> 20; +} + +static void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static int checked_open(const char *pathname, int flags) +{ + int fd = open(pathname, flags); + + if (fd < 0) { + perror(pathname); + exit(EXIT_FAILURE); + } + + return fd; +} + +/* + * pagemap/kpageflags routines + */ + +static unsigned long do_u64_read(int fd, const char *name, + uint64_t *buf, + unsigned long index, + unsigned long count) +{ + long bytes; + + if (index > ULONG_MAX / 8) + fatal("index overflow: %lu\n", index); + + bytes = pread(fd, buf, count * 8, (off_t)index * 8); + if (bytes < 0) { + perror(name); + exit(EXIT_FAILURE); + } + if (bytes % 8) + fatal("partial read: %lu bytes\n", bytes); + + return bytes / 8; +} + +static unsigned long kpageflags_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(kpageflags_fd, opt_kpageflags, buf, index, pages); +} + +static unsigned long kpagecgroup_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + if (kpagecgroup_fd < 0) + return pages; + + return do_u64_read(kpagecgroup_fd, opt_kpageflags, buf, index, pages); +} + +static unsigned long kpagecount_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return kpagecount_fd < 0 ? pages : + do_u64_read(kpagecount_fd, PROC_KPAGECOUNT, + buf, index, pages); +} + +static unsigned long pagemap_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); +} + +static unsigned long pagemap_pfn(uint64_t val) +{ + unsigned long pfn; + + if (val & PM_PRESENT) + pfn = PM_PFRAME(val); + else + pfn = 0; + + return pfn; +} + +static unsigned long pagemap_swap_offset(uint64_t val) +{ + return val & PM_SWAP ? PM_SWAP_OFFSET(val) : 0; +} + +/* + * page flag names + */ + +static char *page_flag_name(uint64_t flags) +{ + static char buf[65]; + int present; + size_t i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { + present = (flags >> i) & 1; + if (!page_flag_names[i]) { + if (present) + fatal("unknown flag bit %d\n", i); + continue; + } + buf[j++] = present ? page_flag_names[i][0] : '_'; + } + + return buf; +} + +static char *page_flag_longname(uint64_t flags) +{ + static char buf[1024]; + size_t i, n; + + for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + if ((flags >> i) & 1) + n += snprintf(buf + n, sizeof(buf) - n, "%s,", + page_flag_names[i] + 2); + } + if (n) + n--; + buf[n] = '\0'; + + return buf; +} + + +/* + * page list and summary + */ + +static void show_page_range(unsigned long voffset, unsigned long offset, + unsigned long size, uint64_t flags, + uint64_t cgroup, uint64_t mapcnt) +{ + static uint64_t flags0; + static uint64_t cgroup0; + static uint64_t mapcnt0; + static unsigned long voff; + static unsigned long index; + static unsigned long count; + + if (flags == flags0 && cgroup == cgroup0 && mapcnt == mapcnt0 && + offset == index + count && size && voffset == voff + count) { + count += size; + return; + } + + if (count) { + if (opt_pid) + printf("%lx\t", voff); + if (opt_file) + printf("%lx\t", voff); + if (opt_list_cgroup) + printf("@%llu\t", (unsigned long long)cgroup0); + if (opt_list_mapcnt) + printf("%lu\t", mapcnt0); + printf("%lx\t%lx\t%s\n", + index, count, page_flag_name(flags0)); + } + + flags0 = flags; + cgroup0 = cgroup; + mapcnt0 = mapcnt; + index = offset; + voff = voffset; + count = size; +} + +static void flush_page_range(void) +{ + show_page_range(0, 0, 0, 0, 0, 0); +} + +static void show_page(unsigned long voffset, unsigned long offset, + uint64_t flags, uint64_t cgroup, uint64_t mapcnt) +{ + if (opt_pid) + printf("%lx\t", voffset); + if (opt_file) + printf("%lx\t", voffset); + if (opt_list_cgroup) + printf("@%llu\t", (unsigned long long)cgroup); + if (opt_list_mapcnt) + printf("%lu\t", mapcnt); + + printf("%lx\t%s\n", offset, page_flag_name(flags)); +} + +static void show_summary(void) +{ + size_t i; + + printf(" flags\tpage-count MB" + " symbolic-flags\t\t\tlong-symbolic-flags\n"); + + for (i = 0; i < ARRAY_SIZE(nr_pages); i++) { + if (nr_pages[i]) + printf("0x%016llx\t%10lu %8lu %s\t%s\n", + (unsigned long long)page_flags[i], + nr_pages[i], + pages2mb(nr_pages[i]), + page_flag_name(page_flags[i]), + page_flag_longname(page_flags[i])); + } + + printf(" total\t%10lu %8lu\n", + total_pages, pages2mb(total_pages)); +} + + +/* + * page flag filters + */ + +static int bit_mask_ok(uint64_t flags) +{ + int i; + + for (i = 0; i < nr_bit_filters; i++) { + if (opt_bits[i] == KPF_ALL_BITS) { + if ((flags & opt_mask[i]) == 0) + return 0; + } else { + if ((flags & opt_mask[i]) != opt_bits[i]) + return 0; + } + } + + return 1; +} + +static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) +{ + /* Anonymous pages overload PG_mappedtodisk */ + if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK))) + flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE); + + /* SLOB/SLUB overload several page flags */ + if (flags & BIT(SLAB)) { + if (flags & BIT(PRIVATE)) + flags ^= BIT(PRIVATE) | BIT(SLOB_FREE); + if (flags & BIT(ACTIVE)) + flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN); + if (flags & BIT(ERROR)) + flags ^= BIT(ERROR) | BIT(SLUB_DEBUG); + } + + /* PG_reclaim is overloaded as PG_readahead in the read path */ + if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM)) + flags ^= BIT(RECLAIM) | BIT(READAHEAD); + + if (pme & PM_SOFT_DIRTY) + flags |= BIT(SOFTDIRTY); + if (pme & PM_FILE) + flags |= BIT(FILE); + if (pme & PM_SWAP) + flags |= BIT(SWAP); + if (pme & PM_MMAP_EXCLUSIVE) + flags |= BIT(MMAP_EXCLUSIVE); + + return flags; +} + +static uint64_t well_known_flags(uint64_t flags) +{ + /* hide flags intended only for kernel hacker */ + flags &= ~KPF_HACKERS_BITS; + + /* hide non-hugeTLB compound pages */ + if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE))) + flags &= ~BITS_COMPOUND; + + return flags; +} + +static uint64_t kpageflags_flags(uint64_t flags, uint64_t pme) +{ + if (opt_raw) + flags = expand_overloaded_flags(flags, pme); + else + flags = well_known_flags(flags); + + return flags; +} + +/* + * page actions + */ + +static void prepare_hwpoison_fd(void) +{ + char buf[MAX_PATH + 1]; + + hwpoison_debug_fs = debugfs__mount(); + if (!hwpoison_debug_fs) { + perror("mount debugfs"); + exit(EXIT_FAILURE); + } + + if (opt_hwpoison && !hwpoison_inject_fd) { + snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn", + hwpoison_debug_fs); + hwpoison_inject_fd = checked_open(buf, O_WRONLY); + } + + if (opt_unpoison && !hwpoison_forget_fd) { + snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn", + hwpoison_debug_fs); + hwpoison_forget_fd = checked_open(buf, O_WRONLY); + } +} + +static int hwpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_inject_fd, buf, len); + if (len < 0) { + perror("hwpoison inject"); + return len; + } + return 0; +} + +static int unpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_forget_fd, buf, len); + if (len < 0) { + perror("hwpoison forget"); + return len; + } + return 0; +} + +static int mark_page_idle(unsigned long offset) +{ + static unsigned long off; + static uint64_t buf; + int len; + + if ((offset / 64 == off / 64) || buf == 0) { + buf |= 1UL << (offset % 64); + off = offset; + return 0; + } + + len = pwrite(page_idle_fd, &buf, 8, 8 * (off / 64)); + if (len < 0) { + perror("mark page idle"); + return len; + } + + buf = 1UL << (offset % 64); + off = offset; + + return 0; +} + +/* + * page frame walker + */ + +static size_t hash_slot(uint64_t flags) +{ + size_t k = HASH_KEY(flags); + size_t i; + + /* Explicitly reserve slot 0 for flags 0: the following logic + * cannot distinguish an unoccupied slot from slot (flags==0). + */ + if (flags == 0) + return 0; + + /* search through the remaining (HASH_SIZE-1) slots */ + for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) { + if (!k || k >= ARRAY_SIZE(page_flags)) + k = 1; + if (page_flags[k] == 0) { + page_flags[k] = flags; + return k; + } + if (page_flags[k] == flags) + return k; + } + + fatal("hash table full: bump up HASH_SHIFT?\n"); + exit(EXIT_FAILURE); +} + +static void add_page(unsigned long voffset, unsigned long offset, + uint64_t flags, uint64_t cgroup, uint64_t mapcnt, + uint64_t pme) +{ + flags = kpageflags_flags(flags, pme); + + if (!bit_mask_ok(flags)) + return; + + if (opt_cgroup && cgroup != (uint64_t)opt_cgroup) + return; + + if (opt_hwpoison) + hwpoison_page(offset); + if (opt_unpoison) + unpoison_page(offset); + + if (opt_mark_idle) + mark_page_idle(offset); + + if (opt_list == 1) + show_page_range(voffset, offset, 1, flags, cgroup, mapcnt); + else if (opt_list == 2) + show_page(voffset, offset, flags, cgroup, mapcnt); + + nr_pages[hash_slot(flags)]++; + total_pages++; +} + +#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ +static void walk_pfn(unsigned long voffset, + unsigned long index, + unsigned long count, + uint64_t pme) +{ + uint64_t buf[KPAGEFLAGS_BATCH]; + uint64_t cgi[KPAGEFLAGS_BATCH]; + uint64_t cnt[KPAGEFLAGS_BATCH]; + unsigned long batch; + unsigned long pages; + unsigned long i; + + /* + * kpagecgroup_read() reads only if kpagecgroup were opened, but + * /proc/kpagecgroup might even not exist, so it's better to fill + * them with zeros here. + */ + if (count == 1) + cgi[0] = 0; + else + memset(cgi, 0, sizeof cgi); + + while (count) { + batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); + pages = kpageflags_read(buf, index, batch); + if (pages == 0) + break; + + if (kpagecgroup_read(cgi, index, pages) != pages) + fatal("kpagecgroup returned fewer pages than expected"); + + if (kpagecount_read(cnt, index, pages) != pages) + fatal("kpagecount returned fewer pages than expected"); + + for (i = 0; i < pages; i++) + add_page(voffset + i, index + i, + buf[i], cgi[i], cnt[i], pme); + + index += pages; + count -= pages; + } +} + +static void walk_swap(unsigned long voffset, uint64_t pme) +{ + uint64_t flags = kpageflags_flags(0, pme); + + if (!bit_mask_ok(flags)) + return; + + if (opt_cgroup) + return; + + if (opt_list == 1) + show_page_range(voffset, pagemap_swap_offset(pme), + 1, flags, 0, 0); + else if (opt_list == 2) + show_page(voffset, pagemap_swap_offset(pme), flags, 0, 0); + + nr_pages[hash_slot(flags)]++; + total_pages++; +} + +#define PAGEMAP_BATCH (64 << 10) +static void walk_vma(unsigned long index, unsigned long count) +{ + uint64_t buf[PAGEMAP_BATCH]; + unsigned long batch; + unsigned long pages; + unsigned long pfn; + unsigned long i; + + while (count) { + batch = min_t(unsigned long, count, PAGEMAP_BATCH); + pages = pagemap_read(buf, index, batch); + if (pages == 0) + break; + + for (i = 0; i < pages; i++) { + pfn = pagemap_pfn(buf[i]); + if (pfn) + walk_pfn(index + i, pfn, 1, buf[i]); + if (buf[i] & PM_SWAP) + walk_swap(index + i, buf[i]); + } + + index += pages; + count -= pages; + } +} + +static void walk_task(unsigned long index, unsigned long count) +{ + const unsigned long end = index + count; + unsigned long start; + int i = 0; + + while (index < end) { + + while (pg_end[i] <= index) + if (++i >= nr_vmas) + return; + if (pg_start[i] >= end) + return; + + start = max_t(unsigned long, pg_start[i], index); + index = min_t(unsigned long, pg_end[i], end); + + assert(start < index); + walk_vma(start, index - start); + } +} + +static void add_addr_range(unsigned long offset, unsigned long size) +{ + if (nr_addr_ranges >= MAX_ADDR_RANGES) + fatal("too many addr ranges\n"); + + opt_offset[nr_addr_ranges] = offset; + opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); + nr_addr_ranges++; +} + +static void walk_addr_ranges(void) +{ + int i; + + kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); + + if (!nr_addr_ranges) + add_addr_range(0, ULONG_MAX); + + for (i = 0; i < nr_addr_ranges; i++) + if (!opt_pid) + walk_pfn(opt_offset[i], opt_offset[i], opt_size[i], 0); + else + walk_task(opt_offset[i], opt_size[i]); + + if (opt_mark_idle) + mark_page_idle(0); + + close(kpageflags_fd); +} + + +/* + * user interface + */ + +static const char *page_flag_type(uint64_t flag) +{ + if (flag & KPF_HACKERS_BITS) + return "(r)"; + if (flag & KPF_OVERLOADED_BITS) + return "(o)"; + return " "; +} + +static void usage(void) +{ + size_t i, j; + + printf( +"page-types [options]\n" +" -r|--raw Raw mode, for kernel developers\n" +" -d|--describe flags Describe flags\n" +" -a|--addr addr-spec Walk a range of pages\n" +" -b|--bits bits-spec Walk pages with specified bits\n" +" -c|--cgroup path|@inode Walk pages within memory cgroup\n" +" -p|--pid pid Walk process address space\n" +" -f|--file filename Walk file address space\n" +" -i|--mark-idle Mark pages idle\n" +" -l|--list Show page details in ranges\n" +" -L|--list-each Show page details one by one\n" +" -C|--list-cgroup Show cgroup inode for pages\n" +" -M|--list-mapcnt Show page map count\n" +" -N|--no-summary Don't show summary info\n" +" -X|--hwpoison hwpoison pages\n" +" -x|--unpoison unpoison pages\n" +" -F|--kpageflags filename kpageflags file to parse\n" +" -h|--help Show this usage message\n" +"flags:\n" +" 0x10 bitfield format, e.g.\n" +" anon bit-name, e.g.\n" +" 0x10,anon comma-separated list, e.g.\n" +"addr-spec:\n" +" N one page at offset N (unit: pages)\n" +" N+M pages range from N to N+M-1\n" +" N,M pages range from N to M-1\n" +" N, pages range from N to end\n" +" ,M pages range from 0 to M-1\n" +"bits-spec:\n" +" bit1,bit2 (flags & (bit1|bit2)) != 0\n" +" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" +" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n" +" =bit1,bit2 flags == (bit1|bit2)\n" +"bit-names:\n" + ); + + for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + printf("%16s%s", page_flag_names[i] + 2, + page_flag_type(1ULL << i)); + if (++j > 3) { + j = 0; + putchar('\n'); + } + } + printf("\n " + "(r) raw mode bits (o) overloaded bits\n"); +} + +static unsigned long long parse_number(const char *str) +{ + unsigned long long n; + + n = strtoll(str, NULL, 0); + + if (n == 0 && str[0] != '0') + fatal("invalid name or number: %s\n", str); + + return n; +} + +static void parse_pid(const char *str) +{ + FILE *file; + char buf[5000]; + + opt_pid = parse_number(str); + + sprintf(buf, "/proc/%d/pagemap", opt_pid); + pagemap_fd = checked_open(buf, O_RDONLY); + + sprintf(buf, "/proc/%d/maps", opt_pid); + file = fopen(buf, "r"); + if (!file) { + perror(buf); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + unsigned long vm_start; + unsigned long vm_end; + unsigned long long pgoff; + int major, minor; + char r, w, x, s; + unsigned long ino; + int n; + + n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", + &vm_start, + &vm_end, + &r, &w, &x, &s, + &pgoff, + &major, &minor, + &ino); + if (n < 10) { + fprintf(stderr, "unexpected line: %s\n", buf); + continue; + } + pg_start[nr_vmas] = vm_start / page_size; + pg_end[nr_vmas] = vm_end / page_size; + if (++nr_vmas >= MAX_VMAS) { + fprintf(stderr, "too many VMAs\n"); + break; + } + } + fclose(file); +} + +static void show_file(const char *name, const struct stat *st) +{ + unsigned long long size = st->st_size; + char atime[64], mtime[64]; + long now = time(NULL); + + printf("%s\tInode: %u\tSize: %llu (%llu pages)\n", + name, (unsigned)st->st_ino, + size, (size + page_size - 1) / page_size); + + strftime(atime, sizeof(atime), "%c", localtime(&st->st_atime)); + strftime(mtime, sizeof(mtime), "%c", localtime(&st->st_mtime)); + + printf("Modify: %s (%ld seconds ago)\nAccess: %s (%ld seconds ago)\n", + mtime, now - st->st_mtime, + atime, now - st->st_atime); +} + +static sigjmp_buf sigbus_jmp; + +static void * volatile sigbus_addr; + +static void sigbus_handler(int sig, siginfo_t *info, void *ucontex) +{ + (void)sig; + (void)ucontex; + sigbus_addr = info ? info->si_addr : NULL; + siglongjmp(sigbus_jmp, 1); +} + +static struct sigaction sigbus_action = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, +}; + +static void walk_file_range(const char *name, int fd, + unsigned long off, unsigned long end) +{ + uint8_t vec[PAGEMAP_BATCH]; + uint64_t buf[PAGEMAP_BATCH], flags; + uint64_t cgroup = 0; + uint64_t mapcnt = 0; + unsigned long nr_pages, pfn, i; + ssize_t len; + void *ptr; + int first = 1; + + for (; off < end; off += len) { + nr_pages = (end - off + page_size - 1) / page_size; + if (nr_pages > PAGEMAP_BATCH) + nr_pages = PAGEMAP_BATCH; + len = nr_pages * page_size; + + ptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, off); + if (ptr == MAP_FAILED) + fatal("mmap failed: %s", name); + + /* determine cached pages */ + if (mincore(ptr, len, vec)) + fatal("mincore failed: %s", name); + + /* turn off readahead */ + if (madvise(ptr, len, MADV_RANDOM)) + fatal("madvice failed: %s", name); + + if (sigsetjmp(sigbus_jmp, 1)) { + end = off + sigbus_addr ? sigbus_addr - ptr : 0; + fprintf(stderr, "got sigbus at offset %lld: %s\n", + (long long)end, name); + goto got_sigbus; + } + + /* populate ptes */ + for (i = 0; i < nr_pages ; i++) { + if (vec[i] & 1) + (void)*(volatile int *)(ptr + i * page_size); + } +got_sigbus: + + /* turn off harvesting reference bits */ + if (madvise(ptr, len, MADV_SEQUENTIAL)) + fatal("madvice failed: %s", name); + + if (pagemap_read(buf, (unsigned long)ptr / page_size, + nr_pages) != nr_pages) + fatal("cannot read pagemap"); + + munmap(ptr, len); + + for (i = 0; i < nr_pages; i++) { + pfn = pagemap_pfn(buf[i]); + if (!pfn) + continue; + if (!kpageflags_read(&flags, pfn, 1)) + continue; + if (!kpagecgroup_read(&cgroup, pfn, 1)) + fatal("kpagecgroup_read failed"); + if (!kpagecount_read(&mapcnt, pfn, 1)) + fatal("kpagecount_read failed"); + if (first && opt_list) { + first = 0; + flush_page_range(); + } + add_page(off / page_size + i, pfn, + flags, cgroup, mapcnt, buf[i]); + } + } +} + +static void walk_file(const char *name, const struct stat *st) +{ + int i; + int fd; + + fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW); + + if (!nr_addr_ranges) + add_addr_range(0, st->st_size / page_size); + + for (i = 0; i < nr_addr_ranges; i++) + walk_file_range(name, fd, opt_offset[i] * page_size, + (opt_offset[i] + opt_size[i]) * page_size); + + close(fd); +} + +int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f) +{ + (void)f; + switch (type) { + case FTW_F: + if (S_ISREG(st->st_mode)) + walk_file(name, st); + break; + case FTW_DNR: + fprintf(stderr, "cannot read dir: %s\n", name); + break; + } + return 0; +} + +struct stat st; + +static void walk_page_cache(void) +{ + kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); + pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY); + sigaction(SIGBUS, &sigbus_action, NULL); + + if (stat(opt_file, &st)) + fatal("stat failed: %s\n", opt_file); + + if (S_ISREG(st.st_mode)) { + walk_file(opt_file, &st); + } else if (S_ISDIR(st.st_mode)) { + /* do not follow symlinks and mountpoints */ + if (nftw(opt_file, walk_tree, 64, FTW_MOUNT | FTW_PHYS) < 0) + fatal("nftw failed: %s\n", opt_file); + } else + fatal("unhandled file type: %s\n", opt_file); + + close(kpageflags_fd); + close(pagemap_fd); + signal(SIGBUS, SIG_DFL); +} + +static void parse_file(const char *name) +{ + opt_file = name; +} + +static void parse_cgroup(const char *path) +{ + if (path[0] == '@') { + opt_cgroup = parse_number(path + 1); + return; + } + + struct stat st; + + if (stat(path, &st)) + fatal("stat failed: %s: %m\n", path); + + if (!S_ISDIR(st.st_mode)) + fatal("cgroup supposed to be a directory: %s\n", path); + + opt_cgroup = st.st_ino; +} + +static void parse_addr_range(const char *optarg) +{ + unsigned long offset; + unsigned long size; + char *p; + + p = strchr(optarg, ','); + if (!p) + p = strchr(optarg, '+'); + + if (p == optarg) { + offset = 0; + size = parse_number(p + 1); + } else if (p) { + offset = parse_number(optarg); + if (p[1] == '\0') + size = ULONG_MAX; + else { + size = parse_number(p + 1); + if (*p == ',') { + if (size < offset) + fatal("invalid range: %lu,%lu\n", + offset, size); + size -= offset; + } + } + } else { + offset = parse_number(optarg); + size = 1; + } + + add_addr_range(offset, size); +} + +static void add_bits_filter(uint64_t mask, uint64_t bits) +{ + if (nr_bit_filters >= MAX_BIT_FILTERS) + fatal("too much bit filters\n"); + + opt_mask[nr_bit_filters] = mask; + opt_bits[nr_bit_filters] = bits; + nr_bit_filters++; +} + +static uint64_t parse_flag_name(const char *str, int len) +{ + size_t i; + + if (!*str || !len) + return 0; + + if (len <= 8 && !strncmp(str, "compound", len)) + return BITS_COMPOUND; + + for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + if (!strncmp(str, page_flag_names[i] + 2, len)) + return 1ULL << i; + } + + return parse_number(str); +} + +static uint64_t parse_flag_names(const char *str, int all) +{ + const char *p = str; + uint64_t flags = 0; + + while (1) { + if (*p == ',' || *p == '=' || *p == '\0') { + if ((*str != '~') || (*str == '~' && all && *++str)) + flags |= parse_flag_name(str, p - str); + if (*p != ',') + break; + str = p + 1; + } + p++; + } + + return flags; +} + +static void parse_bits_mask(const char *optarg) +{ + uint64_t mask; + uint64_t bits; + const char *p; + + p = strchr(optarg, '='); + if (p == optarg) { + mask = KPF_ALL_BITS; + bits = parse_flag_names(p + 1, 0); + } else if (p) { + mask = parse_flag_names(optarg, 0); + bits = parse_flag_names(p + 1, 0); + } else if (strchr(optarg, '~')) { + mask = parse_flag_names(optarg, 1); + bits = parse_flag_names(optarg, 0); + } else { + mask = parse_flag_names(optarg, 0); + bits = KPF_ALL_BITS; + } + + add_bits_filter(mask, bits); +} + +static void parse_kpageflags(const char *name) +{ + opt_kpageflags = name; +} + +static void describe_flags(const char *optarg) +{ + uint64_t flags = parse_flag_names(optarg, 0); + + printf("0x%016llx\t%s\t%s\n", + (unsigned long long)flags, + page_flag_name(flags), + page_flag_longname(flags)); +} + +static const struct option opts[] = { + { "raw" , 0, NULL, 'r' }, + { "pid" , 1, NULL, 'p' }, + { "file" , 1, NULL, 'f' }, + { "addr" , 1, NULL, 'a' }, + { "bits" , 1, NULL, 'b' }, + { "cgroup" , 1, NULL, 'c' }, + { "describe" , 1, NULL, 'd' }, + { "mark-idle" , 0, NULL, 'i' }, + { "list" , 0, NULL, 'l' }, + { "list-each" , 0, NULL, 'L' }, + { "list-cgroup", 0, NULL, 'C' }, + { "list-mapcnt", 0, NULL, 'M' }, + { "no-summary", 0, NULL, 'N' }, + { "hwpoison" , 0, NULL, 'X' }, + { "unpoison" , 0, NULL, 'x' }, + { "kpageflags", 0, NULL, 'F' }, + { "help" , 0, NULL, 'h' }, + { NULL , 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + + page_size = getpagesize(); + + while ((c = getopt_long(argc, argv, + "rp:f:a:b:d:c:CilLMNXxF:h", + opts, NULL)) != -1) { + switch (c) { + case 'r': + opt_raw = 1; + break; + case 'p': + parse_pid(optarg); + break; + case 'f': + parse_file(optarg); + break; + case 'a': + parse_addr_range(optarg); + break; + case 'b': + parse_bits_mask(optarg); + break; + case 'c': + parse_cgroup(optarg); + break; + case 'C': + opt_list_cgroup = 1; + break; + case 'd': + describe_flags(optarg); + exit(0); + case 'i': + opt_mark_idle = 1; + break; + case 'l': + opt_list = 1; + break; + case 'L': + opt_list = 2; + break; + case 'M': + opt_list_mapcnt = 1; + break; + case 'N': + opt_no_summary = 1; + break; + case 'X': + opt_hwpoison = 1; + prepare_hwpoison_fd(); + break; + case 'x': + opt_unpoison = 1; + prepare_hwpoison_fd(); + break; + case 'F': + parse_kpageflags(optarg); + break; + case 'h': + usage(); + exit(0); + default: + usage(); + exit(1); + } + } + + if (!opt_kpageflags) + opt_kpageflags = PROC_KPAGEFLAGS; + + if (opt_cgroup || opt_list_cgroup) + kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY); + + if (opt_list && opt_list_mapcnt) + kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY); + + if (opt_mark_idle) + page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR); + + if (opt_list && opt_pid) + printf("voffset\t"); + if (opt_list && opt_file) + printf("foffset\t"); + if (opt_list && opt_list_cgroup) + printf("cgroup\t"); + if (opt_list && opt_list_mapcnt) + printf("map-cnt\t"); + + if (opt_list == 1) + printf("offset\tlen\tflags\n"); + if (opt_list == 2) + printf("offset\tflags\n"); + + if (opt_file) + walk_page_cache(); + else + walk_addr_ranges(); + + if (opt_list == 1) + flush_page_range(); + + if (opt_no_summary) + return 0; + + if (opt_list) + printf("\n\n"); + + if (opt_file) { + show_file(opt_file, &st); + printf("\n"); + } + + show_summary(); + + if (opt_list_mapcnt) + close(kpagecount_fd); + + if (page_idle_fd >= 0) + close(page_idle_fd); + + return 0; +} diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c new file mode 100644 index 000000000000..7c2ac124cdc8 --- /dev/null +++ b/tools/mm/page_owner_sort.c @@ -0,0 +1,897 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * User-space helper to sort the output of /sys/kernel/debug/page_owner + * + * Example use: + * cat /sys/kernel/debug/page_owner > page_owner_full.txt + * ./page_owner_sort page_owner_full.txt sorted_page_owner.txt + * Or sort by total memory: + * ./page_owner_sort -m page_owner_full.txt sorted_page_owner.txt + * + * See Documentation/mm/page_owner.rst +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define bool int +#define true 1 +#define false 0 +#define TASK_COMM_LEN 16 + +struct block_list { + char *txt; + char *comm; // task command name + char *stacktrace; + __u64 ts_nsec; + __u64 free_ts_nsec; + int len; + int num; + int page_num; + pid_t pid; + pid_t tgid; + int allocator; +}; +enum FILTER_BIT { + FILTER_UNRELEASE = 1<<1, + FILTER_PID = 1<<2, + FILTER_TGID = 1<<3, + FILTER_COMM = 1<<4 +}; +enum CULL_BIT { + CULL_UNRELEASE = 1<<1, + CULL_PID = 1<<2, + CULL_TGID = 1<<3, + CULL_COMM = 1<<4, + CULL_STACKTRACE = 1<<5, + CULL_ALLOCATOR = 1<<6 +}; +enum ALLOCATOR_BIT { + ALLOCATOR_CMA = 1<<1, + ALLOCATOR_SLAB = 1<<2, + ALLOCATOR_VMALLOC = 1<<3, + ALLOCATOR_OTHERS = 1<<4 +}; +enum ARG_TYPE { + ARG_TXT, ARG_COMM, ARG_STACKTRACE, ARG_ALLOC_TS, ARG_FREE_TS, + ARG_CULL_TIME, ARG_PAGE_NUM, ARG_PID, ARG_TGID, ARG_UNKNOWN, ARG_FREE, + ARG_ALLOCATOR +}; +enum SORT_ORDER { + SORT_ASC = 1, + SORT_DESC = -1, +}; +struct filter_condition { + pid_t *pids; + pid_t *tgids; + char **comms; + int pids_size; + int tgids_size; + int comms_size; +}; +struct sort_condition { + int (**cmps)(const void *, const void *); + int *signs; + int size; +}; +static struct filter_condition fc; +static struct sort_condition sc; +static regex_t order_pattern; +static regex_t pid_pattern; +static regex_t tgid_pattern; +static regex_t comm_pattern; +static regex_t ts_nsec_pattern; +static regex_t free_ts_nsec_pattern; +static struct block_list *list; +static int list_size; +static int max_size; +static int cull; +static int filter; +static bool debug_on; + +static void set_single_cmp(int (*cmp)(const void *, const void *), int sign); + +int read_block(char *buf, char *ext_buf, int buf_size, FILE *fin) +{ + char *curr = buf, *const buf_end = buf + buf_size; + + while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) { + if (*curr == '\n') { /* empty line */ + return curr - buf; + } + if (!strncmp(curr, "PFN", 3)) { + strcpy(ext_buf, curr); + continue; + } + curr += strlen(curr); + } + + return -1; /* EOF or no space left in buf. */ +} + +static int compare_txt(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return strcmp(l1->txt, l2->txt); +} + +static int compare_stacktrace(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return strcmp(l1->stacktrace, l2->stacktrace); +} + +static int compare_num(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->num - l2->num; +} + +static int compare_page_num(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->page_num - l2->page_num; +} + +static int compare_pid(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->pid - l2->pid; +} + +static int compare_tgid(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->tgid - l2->tgid; +} + +static int compare_allocator(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->allocator - l2->allocator; +} + +static int compare_comm(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return strcmp(l1->comm, l2->comm); +} + +static int compare_ts(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->ts_nsec < l2->ts_nsec ? -1 : 1; +} + +static int compare_free_ts(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l1->free_ts_nsec < l2->free_ts_nsec ? -1 : 1; +} + +static int compare_release(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + if (!l1->free_ts_nsec && !l2->free_ts_nsec) + return 0; + if (l1->free_ts_nsec && l2->free_ts_nsec) + return 0; + return l1->free_ts_nsec ? 1 : -1; +} + +static int compare_cull_condition(const void *p1, const void *p2) +{ + if (cull == 0) + return compare_txt(p1, p2); + if ((cull & CULL_STACKTRACE) && compare_stacktrace(p1, p2)) + return compare_stacktrace(p1, p2); + if ((cull & CULL_PID) && compare_pid(p1, p2)) + return compare_pid(p1, p2); + if ((cull & CULL_TGID) && compare_tgid(p1, p2)) + return compare_tgid(p1, p2); + if ((cull & CULL_COMM) && compare_comm(p1, p2)) + return compare_comm(p1, p2); + if ((cull & CULL_UNRELEASE) && compare_release(p1, p2)) + return compare_release(p1, p2); + if ((cull & CULL_ALLOCATOR) && compare_allocator(p1, p2)) + return compare_allocator(p1, p2); + return 0; +} + +static int compare_sort_condition(const void *p1, const void *p2) +{ + int cmp = 0; + + for (int i = 0; i < sc.size; ++i) + if (cmp == 0) + cmp = sc.signs[i] * sc.cmps[i](p1, p2); + return cmp; +} + +static int search_pattern(regex_t *pattern, char *pattern_str, char *buf) +{ + int err, val_len; + regmatch_t pmatch[2]; + + err = regexec(pattern, buf, 2, pmatch, REG_NOTBOL); + if (err != 0 || pmatch[1].rm_so == -1) { + if (debug_on) + fprintf(stderr, "no matching pattern in %s\n", buf); + return -1; + } + val_len = pmatch[1].rm_eo - pmatch[1].rm_so; + + memcpy(pattern_str, buf + pmatch[1].rm_so, val_len); + + return 0; +} + +static bool check_regcomp(regex_t *pattern, const char *regex) +{ + int err; + + err = regcomp(pattern, regex, REG_EXTENDED | REG_NEWLINE); + if (err != 0 || pattern->re_nsub != 1) { + fprintf(stderr, "Invalid pattern %s code %d\n", regex, err); + return false; + } + return true; +} + +static char **explode(char sep, const char *str, int *size) +{ + int count = 0, len = strlen(str); + int lastindex = -1, j = 0; + + for (int i = 0; i < len; i++) + if (str[i] == sep) + count++; + char **ret = calloc(++count, sizeof(char *)); + + for (int i = 0; i < len; i++) { + if (str[i] == sep) { + ret[j] = calloc(i - lastindex, sizeof(char)); + memcpy(ret[j++], str + lastindex + 1, i - lastindex - 1); + lastindex = i; + } + } + if (lastindex <= len - 1) { + ret[j] = calloc(len - lastindex, sizeof(char)); + memcpy(ret[j++], str + lastindex + 1, strlen(str) - 1 - lastindex); + } + *size = j; + return ret; +} + +static void free_explode(char **arr, int size) +{ + for (int i = 0; i < size; i++) + free(arr[i]); + free(arr); +} + +# define FIELD_BUFF 25 + +static int get_page_num(char *buf) +{ + int order_val; + char order_str[FIELD_BUFF] = {0}; + char *endptr; + + search_pattern(&order_pattern, order_str, buf); + errno = 0; + order_val = strtol(order_str, &endptr, 10); + if (order_val > 64 || errno != 0 || endptr == order_str || *endptr != '\0') { + if (debug_on) + fprintf(stderr, "wrong order in follow buf:\n%s\n", buf); + return 0; + } + + return 1 << order_val; +} + +static pid_t get_pid(char *buf) +{ + pid_t pid; + char pid_str[FIELD_BUFF] = {0}; + char *endptr; + + search_pattern(&pid_pattern, pid_str, buf); + errno = 0; + pid = strtol(pid_str, &endptr, 10); + if (errno != 0 || endptr == pid_str || *endptr != '\0') { + if (debug_on) + fprintf(stderr, "wrong/invalid pid in follow buf:\n%s\n", buf); + return -1; + } + + return pid; + +} + +static pid_t get_tgid(char *buf) +{ + pid_t tgid; + char tgid_str[FIELD_BUFF] = {0}; + char *endptr; + + search_pattern(&tgid_pattern, tgid_str, buf); + errno = 0; + tgid = strtol(tgid_str, &endptr, 10); + if (errno != 0 || endptr == tgid_str || *endptr != '\0') { + if (debug_on) + fprintf(stderr, "wrong/invalid tgid in follow buf:\n%s\n", buf); + return -1; + } + + return tgid; + +} + +static __u64 get_ts_nsec(char *buf) +{ + __u64 ts_nsec; + char ts_nsec_str[FIELD_BUFF] = {0}; + char *endptr; + + search_pattern(&ts_nsec_pattern, ts_nsec_str, buf); + errno = 0; + ts_nsec = strtoull(ts_nsec_str, &endptr, 10); + if (errno != 0 || endptr == ts_nsec_str || *endptr != '\0') { + if (debug_on) + fprintf(stderr, "wrong ts_nsec in follow buf:\n%s\n", buf); + return -1; + } + + return ts_nsec; +} + +static __u64 get_free_ts_nsec(char *buf) +{ + __u64 free_ts_nsec; + char free_ts_nsec_str[FIELD_BUFF] = {0}; + char *endptr; + + search_pattern(&free_ts_nsec_pattern, free_ts_nsec_str, buf); + errno = 0; + free_ts_nsec = strtoull(free_ts_nsec_str, &endptr, 10); + if (errno != 0 || endptr == free_ts_nsec_str || *endptr != '\0') { + if (debug_on) + fprintf(stderr, "wrong free_ts_nsec in follow buf:\n%s\n", buf); + return -1; + } + + return free_ts_nsec; +} + +static char *get_comm(char *buf) +{ + char *comm_str = malloc(TASK_COMM_LEN); + + memset(comm_str, 0, TASK_COMM_LEN); + + search_pattern(&comm_pattern, comm_str, buf); + errno = 0; + if (errno != 0) { + if (debug_on) + fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); + return NULL; + } + + return comm_str; +} + +static int get_arg_type(const char *arg) +{ + if (!strcmp(arg, "pid") || !strcmp(arg, "p")) + return ARG_PID; + else if (!strcmp(arg, "tgid") || !strcmp(arg, "tg")) + return ARG_TGID; + else if (!strcmp(arg, "name") || !strcmp(arg, "n")) + return ARG_COMM; + else if (!strcmp(arg, "stacktrace") || !strcmp(arg, "st")) + return ARG_STACKTRACE; + else if (!strcmp(arg, "free") || !strcmp(arg, "f")) + return ARG_FREE; + else if (!strcmp(arg, "txt") || !strcmp(arg, "T")) + return ARG_TXT; + else if (!strcmp(arg, "free_ts") || !strcmp(arg, "ft")) + return ARG_FREE_TS; + else if (!strcmp(arg, "alloc_ts") || !strcmp(arg, "at")) + return ARG_ALLOC_TS; + else if (!strcmp(arg, "allocator") || !strcmp(arg, "ator")) + return ARG_ALLOCATOR; + else { + return ARG_UNKNOWN; + } +} + +static int get_allocator(const char *buf, const char *migrate_info) +{ + char *tmp, *first_line, *second_line; + int allocator = 0; + + if (strstr(migrate_info, "CMA")) + allocator |= ALLOCATOR_CMA; + if (strstr(migrate_info, "slab")) + allocator |= ALLOCATOR_SLAB; + tmp = strstr(buf, "__vmalloc_node_range"); + if (tmp) { + second_line = tmp; + while (*tmp != '\n') + tmp--; + tmp--; + while (*tmp != '\n') + tmp--; + first_line = ++tmp; + tmp = strstr(tmp, "alloc_pages"); + if (tmp && first_line <= tmp && tmp < second_line) + allocator |= ALLOCATOR_VMALLOC; + } + if (allocator == 0) + allocator = ALLOCATOR_OTHERS; + return allocator; +} + +static bool match_num_list(int num, int *list, int list_size) +{ + for (int i = 0; i < list_size; ++i) + if (list[i] == num) + return true; + return false; +} + +static bool match_str_list(const char *str, char **list, int list_size) +{ + for (int i = 0; i < list_size; ++i) + if (!strcmp(list[i], str)) + return true; + return false; +} + +static bool is_need(char *buf) +{ + __u64 ts_nsec, free_ts_nsec; + + ts_nsec = get_ts_nsec(buf); + free_ts_nsec = get_free_ts_nsec(buf); + + if ((filter & FILTER_UNRELEASE) && free_ts_nsec != 0 && ts_nsec < free_ts_nsec) + return false; + if ((filter & FILTER_PID) && !match_num_list(get_pid(buf), fc.pids, fc.pids_size)) + return false; + if ((filter & FILTER_TGID) && + !match_num_list(get_tgid(buf), fc.tgids, fc.tgids_size)) + return false; + + char *comm = get_comm(buf); + + if ((filter & FILTER_COMM) && + !match_str_list(comm, fc.comms, fc.comms_size)) { + free(comm); + return false; + } + free(comm); + return true; +} + +static bool add_list(char *buf, int len, char *ext_buf) +{ + if (list_size != 0 && + len == list[list_size-1].len && + memcmp(buf, list[list_size-1].txt, len) == 0) { + list[list_size-1].num++; + list[list_size-1].page_num += get_page_num(buf); + return true; + } + if (list_size == max_size) { + fprintf(stderr, "max_size too small??\n"); + return false; + } + if (!is_need(buf)) + return true; + list[list_size].pid = get_pid(buf); + list[list_size].tgid = get_tgid(buf); + list[list_size].comm = get_comm(buf); + list[list_size].txt = malloc(len+1); + if (!list[list_size].txt) { + fprintf(stderr, "Out of memory\n"); + return false; + } + memcpy(list[list_size].txt, buf, len); + list[list_size].txt[len] = 0; + list[list_size].len = len; + list[list_size].num = 1; + list[list_size].page_num = get_page_num(buf); + + list[list_size].stacktrace = strchr(list[list_size].txt, '\n') ?: ""; + if (*list[list_size].stacktrace == '\n') + list[list_size].stacktrace++; + list[list_size].ts_nsec = get_ts_nsec(buf); + list[list_size].free_ts_nsec = get_free_ts_nsec(buf); + list[list_size].allocator = get_allocator(buf, ext_buf); + list_size++; + if (list_size % 1000 == 0) { + printf("loaded %d\r", list_size); + fflush(stdout); + } + return true; +} + +static bool parse_cull_args(const char *arg_str) +{ + int size = 0; + char **args = explode(',', arg_str, &size); + + for (int i = 0; i < size; ++i) { + int arg_type = get_arg_type(args[i]); + + if (arg_type == ARG_PID) + cull |= CULL_PID; + else if (arg_type == ARG_TGID) + cull |= CULL_TGID; + else if (arg_type == ARG_COMM) + cull |= CULL_COMM; + else if (arg_type == ARG_STACKTRACE) + cull |= CULL_STACKTRACE; + else if (arg_type == ARG_FREE) + cull |= CULL_UNRELEASE; + else if (arg_type == ARG_ALLOCATOR) + cull |= CULL_ALLOCATOR; + else { + free_explode(args, size); + return false; + } + } + free_explode(args, size); + if (sc.size == 0) + set_single_cmp(compare_num, SORT_DESC); + return true; +} + +static void set_single_cmp(int (*cmp)(const void *, const void *), int sign) +{ + if (sc.signs == NULL || sc.size < 1) + sc.signs = calloc(1, sizeof(int)); + sc.signs[0] = sign; + if (sc.cmps == NULL || sc.size < 1) + sc.cmps = calloc(1, sizeof(int *)); + sc.cmps[0] = cmp; + sc.size = 1; +} + +static bool parse_sort_args(const char *arg_str) +{ + int size = 0; + + if (sc.size != 0) { /* reset sort_condition */ + free(sc.signs); + free(sc.cmps); + size = 0; + } + + char **args = explode(',', arg_str, &size); + + sc.signs = calloc(size, sizeof(int)); + sc.cmps = calloc(size, sizeof(int *)); + for (int i = 0; i < size; ++i) { + int offset = 0; + + sc.signs[i] = SORT_ASC; + if (args[i][0] == '-' || args[i][0] == '+') { + if (args[i][0] == '-') + sc.signs[i] = SORT_DESC; + offset = 1; + } + + int arg_type = get_arg_type(args[i]+offset); + + if (arg_type == ARG_PID) + sc.cmps[i] = compare_pid; + else if (arg_type == ARG_TGID) + sc.cmps[i] = compare_tgid; + else if (arg_type == ARG_COMM) + sc.cmps[i] = compare_comm; + else if (arg_type == ARG_STACKTRACE) + sc.cmps[i] = compare_stacktrace; + else if (arg_type == ARG_ALLOC_TS) + sc.cmps[i] = compare_ts; + else if (arg_type == ARG_FREE_TS) + sc.cmps[i] = compare_free_ts; + else if (arg_type == ARG_TXT) + sc.cmps[i] = compare_txt; + else if (arg_type == ARG_ALLOCATOR) + sc.cmps[i] = compare_allocator; + else { + free_explode(args, size); + sc.size = 0; + return false; + } + } + sc.size = size; + free_explode(args, size); + return true; +} + +static int *parse_nums_list(char *arg_str, int *list_size) +{ + int size = 0; + char **args = explode(',', arg_str, &size); + int *list = calloc(size, sizeof(int)); + + errno = 0; + for (int i = 0; i < size; ++i) { + char *endptr = NULL; + + list[i] = strtol(args[i], &endptr, 10); + if (errno != 0 || endptr == args[i] || *endptr != '\0') { + free(list); + return NULL; + } + } + *list_size = size; + free_explode(args, size); + return list; +} + +static void print_allocator(FILE *out, int allocator) +{ + fprintf(out, "allocated by "); + if (allocator & ALLOCATOR_CMA) + fprintf(out, "CMA "); + if (allocator & ALLOCATOR_SLAB) + fprintf(out, "SLAB "); + if (allocator & ALLOCATOR_VMALLOC) + fprintf(out, "VMALLOC "); + if (allocator & ALLOCATOR_OTHERS) + fprintf(out, "OTHERS "); +} + +#define BUF_SIZE (128 * 1024) + +static void usage(void) +{ + printf("Usage: ./page_owner_sort [OPTIONS] \n" + "-m\t\tSort by total memory.\n" + "-s\t\tSort by the stack trace.\n" + "-t\t\tSort by times (default).\n" + "-p\t\tSort by pid.\n" + "-P\t\tSort by tgid.\n" + "-n\t\tSort by task command name.\n" + "-a\t\tSort by memory allocate time.\n" + "-r\t\tSort by memory release time.\n" + "-f\t\tFilter out the information of blocks whose memory has been released.\n" + "-d\t\tPrint debug information.\n" + "--pid \tSelect by pid. This selects the information of blocks whose process ID numbers appear in .\n" + "--tgid \tSelect by tgid. This selects the information of blocks whose Thread Group ID numbers appear in .\n" + "--name \n\t\tSelect by command name. This selects the information of blocks whose command name appears in .\n" + "--cull \tCull by user-defined rules. is a single argument in the form of a comma-separated list with some common fields predefined\n" + "--sort \tSpecify sort order as: [+|-]key[,[+|-]key[,...]]\n" + ); +} + +int main(int argc, char **argv) +{ + FILE *fin, *fout; + char *buf, *ext_buf; + int i, count; + struct stat st; + int opt; + struct option longopts[] = { + { "pid", required_argument, NULL, 1 }, + { "tgid", required_argument, NULL, 2 }, + { "name", required_argument, NULL, 3 }, + { "cull", required_argument, NULL, 4 }, + { "sort", required_argument, NULL, 5 }, + { 0, 0, 0, 0}, + }; + + while ((opt = getopt_long(argc, argv, "adfmnprstP", longopts, NULL)) != -1) + switch (opt) { + case 'a': + set_single_cmp(compare_ts, SORT_ASC); + break; + case 'd': + debug_on = true; + break; + case 'f': + filter = filter | FILTER_UNRELEASE; + break; + case 'm': + set_single_cmp(compare_page_num, SORT_DESC); + break; + case 'p': + set_single_cmp(compare_pid, SORT_ASC); + break; + case 'r': + set_single_cmp(compare_free_ts, SORT_ASC); + break; + case 's': + set_single_cmp(compare_stacktrace, SORT_ASC); + break; + case 't': + set_single_cmp(compare_num, SORT_DESC); + break; + case 'P': + set_single_cmp(compare_tgid, SORT_ASC); + break; + case 'n': + set_single_cmp(compare_comm, SORT_ASC); + break; + case 1: + filter = filter | FILTER_PID; + fc.pids = parse_nums_list(optarg, &fc.pids_size); + if (fc.pids == NULL) { + fprintf(stderr, "wrong/invalid pid in from the command line:%s\n", + optarg); + exit(1); + } + break; + case 2: + filter = filter | FILTER_TGID; + fc.tgids = parse_nums_list(optarg, &fc.tgids_size); + if (fc.tgids == NULL) { + fprintf(stderr, "wrong/invalid tgid in from the command line:%s\n", + optarg); + exit(1); + } + break; + case 3: + filter = filter | FILTER_COMM; + fc.comms = explode(',', optarg, &fc.comms_size); + break; + case 4: + if (!parse_cull_args(optarg)) { + fprintf(stderr, "wrong argument after --cull option:%s\n", + optarg); + exit(1); + } + break; + case 5: + if (!parse_sort_args(optarg)) { + fprintf(stderr, "wrong argument after --sort option:%s\n", + optarg); + exit(1); + } + break; + default: + usage(); + exit(1); + } + + if (optind >= (argc - 1)) { + usage(); + exit(1); + } + + fin = fopen(argv[optind], "r"); + fout = fopen(argv[optind + 1], "w"); + if (!fin || !fout) { + usage(); + perror("open: "); + exit(1); + } + + if (!check_regcomp(&order_pattern, "order\\s*([0-9]*),")) + goto out_order; + if (!check_regcomp(&pid_pattern, "pid\\s*([0-9]*),")) + goto out_pid; + if (!check_regcomp(&tgid_pattern, "tgid\\s*([0-9]*) ")) + goto out_tgid; + if (!check_regcomp(&comm_pattern, "tgid\\s*[0-9]*\\s*\\((.*)\\),\\s*ts")) + goto out_comm; + if (!check_regcomp(&ts_nsec_pattern, "ts\\s*([0-9]*)\\s*ns,")) + goto out_ts; + if (!check_regcomp(&free_ts_nsec_pattern, "free_ts\\s*([0-9]*)\\s*ns")) + goto out_free_ts; + + fstat(fileno(fin), &st); + max_size = st.st_size / 100; /* hack ... */ + + list = malloc(max_size * sizeof(*list)); + buf = malloc(BUF_SIZE); + ext_buf = malloc(BUF_SIZE); + if (!list || !buf || !ext_buf) { + fprintf(stderr, "Out of memory\n"); + goto out_free; + } + + for ( ; ; ) { + int buf_len = read_block(buf, ext_buf, BUF_SIZE, fin); + + if (buf_len < 0) + break; + if (!add_list(buf, buf_len, ext_buf)) + goto out_free; + } + + printf("loaded %d\n", list_size); + + printf("sorting ....\n"); + + qsort(list, list_size, sizeof(list[0]), compare_cull_condition); + + printf("culling\n"); + + for (i = count = 0; i < list_size; i++) { + if (count == 0 || + compare_cull_condition((void *)(&list[count-1]), (void *)(&list[i])) != 0) { + list[count++] = list[i]; + } else { + list[count-1].num += list[i].num; + list[count-1].page_num += list[i].page_num; + } + } + + qsort(list, count, sizeof(list[0]), compare_sort_condition); + + for (i = 0; i < count; i++) { + if (cull == 0) { + fprintf(fout, "%d times, %d pages, ", list[i].num, list[i].page_num); + print_allocator(fout, list[i].allocator); + fprintf(fout, ":\n%s\n", list[i].txt); + } + else { + fprintf(fout, "%d times, %d pages", + list[i].num, list[i].page_num); + if (cull & CULL_PID || filter & FILTER_PID) + fprintf(fout, ", PID %d", list[i].pid); + if (cull & CULL_TGID || filter & FILTER_TGID) + fprintf(fout, ", TGID %d", list[i].pid); + if (cull & CULL_COMM || filter & FILTER_COMM) + fprintf(fout, ", task_comm_name: %s", list[i].comm); + if (cull & CULL_ALLOCATOR) { + fprintf(fout, ", "); + print_allocator(fout, list[i].allocator); + } + if (cull & CULL_UNRELEASE) + fprintf(fout, " (%s)", + list[i].free_ts_nsec ? "UNRELEASED" : "RELEASED"); + if (cull & CULL_STACKTRACE) + fprintf(fout, ":\n%s", list[i].stacktrace); + fprintf(fout, "\n"); + } + } + +out_free: + if (ext_buf) + free(ext_buf); + if (buf) + free(buf); + if (list) + free(list); +out_free_ts: + regfree(&free_ts_nsec_pattern); +out_ts: + regfree(&ts_nsec_pattern); +out_comm: + regfree(&comm_pattern); +out_tgid: + regfree(&tgid_pattern); +out_pid: + regfree(&pid_pattern); +out_order: + regfree(&order_pattern); + + return 0; +} diff --git a/tools/mm/slabinfo-gnuplot.sh b/tools/mm/slabinfo-gnuplot.sh new file mode 100644 index 000000000000..873a892147e5 --- /dev/null +++ b/tools/mm/slabinfo-gnuplot.sh @@ -0,0 +1,268 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +# Sergey Senozhatsky, 2015 +# sergey.senozhatsky.work@gmail.com +# + + +# This program is intended to plot a `slabinfo -X' stats, collected, +# for example, using the following command: +# while [ 1 ]; do slabinfo -X >> stats; sleep 1; done +# +# Use `slabinfo-gnuplot.sh stats' to pre-process collected records +# and generate graphs (totals, slabs sorted by size, slabs sorted +# by size). +# +# Graphs can be [individually] regenerate with different ranges and +# size (-r %d,%d and -s %d,%d options). +# +# To visually compare N `totals' graphs, do +# slabinfo-gnuplot.sh -t FILE1-totals FILE2-totals ... FILEN-totals +# + +min_slab_name_size=11 +xmin=0 +xmax=0 +width=1500 +height=700 +mode=preprocess + +usage() +{ + echo "Usage: [-s W,H] [-r MIN,MAX] [-t|-l] FILE1 [FILE2 ..]" + echo "FILEs must contain 'slabinfo -X' samples" + echo "-t - plot totals for FILE(s)" + echo "-l - plot slabs stats for FILE(s)" + echo "-s %d,%d - set image width and height" + echo "-r %d,%d - use data samples from a given range" +} + +check_file_exist() +{ + if [ ! -f "$1" ]; then + echo "File '$1' does not exist" + exit 1 + fi +} + +do_slabs_plotting() +{ + local file=$1 + local out_file + local range="every ::$xmin" + local xtic="" + local xtic_rotate="norotate" + local lines=2000000 + local wc_lines + + check_file_exist "$file" + + out_file=`basename "$file"` + if [ $xmax -ne 0 ]; then + range="$range::$xmax" + lines=$((xmax-xmin)) + fi + + wc_lines=`cat "$file" | wc -l` + if [ $? -ne 0 ] || [ "$wc_lines" -eq 0 ] ; then + wc_lines=$lines + fi + + if [ "$wc_lines" -lt "$lines" ]; then + lines=$wc_lines + fi + + if [ $((width / lines)) -gt $min_slab_name_size ]; then + xtic=":xtic(1)" + xtic_rotate=90 + fi + +gnuplot -p << EOF +#!/usr/bin/env gnuplot + +set terminal png enhanced size $width,$height large +set output '$out_file.png' +set autoscale xy +set xlabel 'samples' +set ylabel 'bytes' +set style histogram columnstacked title textcolor lt -1 +set style fill solid 0.15 +set xtics rotate $xtic_rotate +set key left above Left title reverse + +plot "$file" $range u 2$xtic title 'SIZE' with boxes,\ + '' $range u 3 title 'LOSS' with boxes +EOF + + if [ $? -eq 0 ]; then + echo "$out_file.png" + fi +} + +do_totals_plotting() +{ + local gnuplot_cmd="" + local range="every ::$xmin" + local file="" + + if [ $xmax -ne 0 ]; then + range="$range::$xmax" + fi + + for i in "${t_files[@]}"; do + check_file_exist "$i" + + file="$file"`basename "$i"` + gnuplot_cmd="$gnuplot_cmd '$i' $range using 1 title\ + '$i Memory usage' with lines," + gnuplot_cmd="$gnuplot_cmd '' $range using 2 title \ + '$i Loss' with lines," + done + +gnuplot -p << EOF +#!/usr/bin/env gnuplot + +set terminal png enhanced size $width,$height large +set autoscale xy +set output '$file.png' +set xlabel 'samples' +set ylabel 'bytes' +set key left above Left title reverse + +plot $gnuplot_cmd +EOF + + if [ $? -eq 0 ]; then + echo "$file.png" + fi +} + +do_preprocess() +{ + local out + local lines + local in=$1 + + check_file_exist "$in" + + # use only 'TOP' slab (biggest memory usage or loss) + let lines=3 + out=`basename "$in"`"-slabs-by-loss" + `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ + grep -E -iv '\-\-|Name|Slabs'\ + | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` + if [ $? -eq 0 ]; then + do_slabs_plotting "$out" + fi + + let lines=3 + out=`basename "$in"`"-slabs-by-size" + `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ + grep -E -iv '\-\-|Name|Slabs'\ + | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` + if [ $? -eq 0 ]; then + do_slabs_plotting "$out" + fi + + out=`basename "$in"`"-totals" + `cat "$in" | grep "Memory used" |\ + awk '{print $3" "$7}' > "$out"` + if [ $? -eq 0 ]; then + t_files[0]=$out + do_totals_plotting + fi +} + +parse_opts() +{ + local opt + + while getopts "tlr::s::h" opt; do + case $opt in + t) + mode=totals + ;; + l) + mode=slabs + ;; + s) + array=(${OPTARG//,/ }) + width=${array[0]} + height=${array[1]} + ;; + r) + array=(${OPTARG//,/ }) + xmin=${array[0]} + xmax=${array[1]} + ;; + h) + usage + exit 0 + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + :) + echo "-$OPTARG requires an argument." >&2 + exit 1 + ;; + esac + done + + return $OPTIND +} + +parse_args() +{ + local idx=0 + local p + + for p in "$@"; do + case $mode in + preprocess) + files[$idx]=$p + idx=$idx+1 + ;; + totals) + t_files[$idx]=$p + idx=$idx+1 + ;; + slabs) + files[$idx]=$p + idx=$idx+1 + ;; + esac + done +} + +parse_opts "$@" +argstart=$? +parse_args "${@:$argstart}" + +if [ ${#files[@]} -eq 0 ] && [ ${#t_files[@]} -eq 0 ]; then + usage + exit 1 +fi + +case $mode in + preprocess) + for i in "${files[@]}"; do + do_preprocess "$i" + done + ;; + totals) + do_totals_plotting + ;; + slabs) + for i in "${files[@]}"; do + do_slabs_plotting "$i" + done + ;; + *) + echo "Unknown mode $mode" >&2 + usage + exit 1 + ;; +esac diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c new file mode 100644 index 000000000000..cfaeaea71042 --- /dev/null +++ b/tools/mm/slabinfo.c @@ -0,0 +1,1544 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Slabinfo: Tool to get reports about slabs + * + * (C) 2007 sgi, Christoph Lameter + * (C) 2011 Linux Foundation, Christoph Lameter + * + * Compile with: + * + * gcc -o slabinfo slabinfo.c + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_SLABS 500 +#define MAX_ALIASES 500 +#define MAX_NODES 1024 + +struct slabinfo { + char *name; + int alias; + int refs; + int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + unsigned int hwcache_align, object_size, objs_per_slab; + unsigned int sanity_checks, slab_size, store_user, trace; + int order, poison, reclaim_account, red_zone; + unsigned long partial, objects, slabs, objects_partial, objects_total; + unsigned long alloc_fastpath, alloc_slowpath; + unsigned long free_fastpath, free_slowpath; + unsigned long free_frozen, free_add_partial, free_remove_partial; + unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; + unsigned long cpuslab_flush, deactivate_full, deactivate_empty; + unsigned long deactivate_to_head, deactivate_to_tail; + unsigned long deactivate_remote_frees, order_fallback; + unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; + unsigned long alloc_node_mismatch, deactivate_bypass; + unsigned long cpu_partial_alloc, cpu_partial_free; + int numa[MAX_NODES]; + int numa_partial[MAX_NODES]; +} slabinfo[MAX_SLABS]; + +struct aliasinfo { + char *name; + char *ref; + struct slabinfo *slab; +} aliasinfo[MAX_ALIASES]; + +int slabs; +int actual_slabs; +int aliases; +int alias_targets; +int highest_node; + +char buffer[4096]; + +int show_empty; +int show_report; +int show_alias; +int show_slab; +int skip_zero = 1; +int show_numa; +int show_track; +int show_first_alias; +int validate; +int shrink; +int show_inverted; +int show_single_ref; +int show_totals; +int sort_size; +int sort_active; +int set_debug; +int show_ops; +int sort_partial; +int show_activity; +int output_lines = -1; +int sort_loss; +int extended_totals; +int show_bytes; +int unreclaim_only; + +/* Debug options */ +int sanity; +int redzone; +int poison; +int tracking; +int tracing; + +int page_size; + +regex_t pattern; + +static void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void usage(void) +{ + printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n" + "slabinfo [-aABDefhilLnoPrsStTUvXz1] [N=K] [-dafzput] [slab-regexp]\n" + "-a|--aliases Show aliases\n" + "-A|--activity Most active slabs first\n" + "-B|--Bytes Show size in bytes\n" + "-D|--display-active Switch line format to activity\n" + "-e|--empty Show empty slabs\n" + "-f|--first-alias Show first alias\n" + "-h|--help Show usage information\n" + "-i|--inverted Inverted list\n" + "-l|--slabs Show slabs\n" + "-L|--Loss Sort by loss\n" + "-n|--numa Show NUMA information\n" + "-N|--lines=K Show the first K slabs\n" + "-o|--ops Show kmem_cache_ops\n" + "-P|--partial Sort by number of partial slabs\n" + "-r|--report Detailed report on single slabs\n" + "-s|--shrink Shrink slabs\n" + "-S|--Size Sort by size\n" + "-t|--tracking Show alloc/free information\n" + "-T|--Totals Show summary information\n" + "-U|--Unreclaim Show unreclaimable slabs only\n" + "-v|--validate Validate slabs\n" + "-X|--Xtotals Show extended summary information\n" + "-z|--zero Include empty slabs\n" + "-1|--1ref Single reference\n" + + "\n" + "-d | --debug Switch off all debug options\n" + "-da | --debug=a Switch on all debug options (--debug=FZPU)\n" + + "\n" + "-d[afzput] | --debug=[afzput]\n" + " f | F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n" + " z | Z Redzoning\n" + " p | P Poisoning\n" + " u | U Tracking\n" + " t | T Tracing\n" + + "\nSorting options (--Loss, --Size, --Partial) are mutually exclusive\n" + ); +} + +static unsigned long read_obj(const char *name) +{ + FILE *f = fopen(name, "r"); + + if (!f) { + buffer[0] = 0; + if (errno == EACCES) + fatal("%s, Try using superuser\n", strerror(errno)); + } else { + if (!fgets(buffer, sizeof(buffer), f)) + buffer[0] = 0; + fclose(f); + if (buffer[strlen(buffer)] == '\n') + buffer[strlen(buffer)] = 0; + } + return strlen(buffer); +} + + +/* + * Get the contents of an attribute + */ +static unsigned long get_obj(const char *name) +{ + if (!read_obj(name)) + return 0; + + return atol(buffer); +} + +static unsigned long get_obj_and_str(const char *name, char **x) +{ + unsigned long result = 0; + char *p; + + *x = NULL; + + if (!read_obj(name)) { + x = NULL; + return 0; + } + result = strtoul(buffer, &p, 10); + while (*p == ' ') + p++; + if (*p) + *x = strdup(p); + return result; +} + +static void set_obj(struct slabinfo *s, const char *name, int n) +{ + char x[100]; + FILE *f; + + snprintf(x, 100, "%s/%s", s->name, name); + f = fopen(x, "w"); + if (!f) + fatal("Cannot write to %s\n", x); + + fprintf(f, "%d\n", n); + fclose(f); +} + +static unsigned long read_slab_obj(struct slabinfo *s, const char *name) +{ + char x[100]; + FILE *f; + size_t l; + + snprintf(x, 100, "%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} + +static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name) +{ + char x[128]; + FILE *f; + size_t l; + + snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} + +/* + * Put a size string together + */ +static int store_size(char *buffer, unsigned long value) +{ + unsigned long divisor = 1; + char trailer = 0; + int n; + + if (!show_bytes) { + if (value > 1000000000UL) { + divisor = 100000000UL; + trailer = 'G'; + } else if (value > 1000000UL) { + divisor = 100000UL; + trailer = 'M'; + } else if (value > 1000UL) { + divisor = 100; + trailer = 'K'; + } + } + + value /= divisor; + n = sprintf(buffer, "%ld",value); + if (trailer) { + buffer[n] = trailer; + n++; + buffer[n] = 0; + } + if (divisor != 1) { + memmove(buffer + n - 2, buffer + n - 3, 4); + buffer[n-2] = '.'; + n++; + } + return n; +} + +static void decode_numa_list(int *numa, char *t) +{ + int node; + int nr; + + memset(numa, 0, MAX_NODES * sizeof(int)); + + if (!t) + return; + + while (*t == 'N') { + t++; + node = strtoul(t, &t, 10); + if (*t == '=') { + t++; + nr = strtoul(t, &t, 10); + numa[node] = nr; + if (node > highest_node) + highest_node = node; + } + while (*t == ' ') + t++; + } +} + +static void slab_validate(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + set_obj(s, "validate", 1); +} + +static void slab_shrink(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + set_obj(s, "shrink", 1); +} + +int line = 0; + +static void first_line(void) +{ + if (show_activity) + printf("Name Objects Alloc Free" + " %%Fast Fallb O CmpX UL\n"); + else + printf("Name Objects Objsize %s " + "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n", + sort_loss ? " Loss" : "Space"); +} + +/* + * Find the shortest alias of a slab + */ +static struct aliasinfo *find_one_alias(struct slabinfo *find) +{ + struct aliasinfo *a; + struct aliasinfo *best = NULL; + + for(a = aliasinfo;a < aliasinfo + aliases; a++) { + if (a->slab == find && + (!best || strlen(best->name) < strlen(a->name))) { + best = a; + if (strncmp(a->name,"kmall", 5) == 0) + return best; + } + } + return best; +} + +static unsigned long slab_size(struct slabinfo *s) +{ + return s->slabs * (page_size << s->order); +} + +static unsigned long slab_activity(struct slabinfo *s) +{ + return s->alloc_fastpath + s->free_fastpath + + s->alloc_slowpath + s->free_slowpath; +} + +static unsigned long slab_waste(struct slabinfo *s) +{ + return slab_size(s) - s->objects * s->object_size; +} + +static void slab_numa(struct slabinfo *s, int mode) +{ + int node; + + if (strcmp(s->name, "*") == 0) + return; + + if (!highest_node) { + printf("\n%s: No NUMA information available.\n", s->name); + return; + } + + if (skip_zero && !s->slabs) + return; + + if (!line) { + printf("\n%-21s:", mode ? "NUMA nodes" : "Slab"); + for(node = 0; node <= highest_node; node++) + printf(" %4d", node); + printf("\n----------------------"); + for(node = 0; node <= highest_node; node++) + printf("-----"); + printf("\n"); + } + printf("%-21s ", mode ? "All slabs" : s->name); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa[node]); + printf(" %4s", b); + } + printf("\n"); + if (mode) { + printf("%-21s ", "Partial slabs"); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa_partial[node]); + printf(" %4s", b); + } + printf("\n"); + } + line++; +} + +static void show_tracking(struct slabinfo *s) +{ + printf("\n%s: Kernel object allocation\n", s->name); + printf("-----------------------------------------------------------------------\n"); + if (read_debug_slab_obj(s, "alloc_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "alloc_calls")) + printf("%s", buffer); + else + printf("No Data\n"); + + printf("\n%s: Kernel object freeing\n", s->name); + printf("------------------------------------------------------------------------\n"); + if (read_debug_slab_obj(s, "free_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "free_calls")) + printf("%s", buffer); + else + printf("No Data\n"); + +} + +static void ops(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + if (read_slab_obj(s, "ops")) { + printf("\n%s: kmem_cache operations\n", s->name); + printf("--------------------------------------------\n"); + printf("%s", buffer); + } else + printf("\n%s has no kmem_cache operations\n", s->name); +} + +static const char *onoff(int x) +{ + if (x) + return "On "; + return "Off"; +} + +static void slab_stats(struct slabinfo *s) +{ + unsigned long total_alloc; + unsigned long total_free; + unsigned long total; + + if (!s->alloc_slab) + return; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + if (!total_alloc) + return; + + printf("\n"); + printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); + printf("--------------------------------------------------\n"); + printf("Fastpath %8lu %8lu %3lu %3lu\n", + s->alloc_fastpath, s->free_fastpath, + s->alloc_fastpath * 100 / total_alloc, + total_free ? s->free_fastpath * 100 / total_free : 0); + printf("Slowpath %8lu %8lu %3lu %3lu\n", + total_alloc - s->alloc_fastpath, s->free_slowpath, + (total_alloc - s->alloc_fastpath) * 100 / total_alloc, + total_free ? s->free_slowpath * 100 / total_free : 0); + printf("Page Alloc %8lu %8lu %3lu %3lu\n", + s->alloc_slab, s->free_slab, + s->alloc_slab * 100 / total_alloc, + total_free ? s->free_slab * 100 / total_free : 0); + printf("Add partial %8lu %8lu %3lu %3lu\n", + s->deactivate_to_head + s->deactivate_to_tail, + s->free_add_partial, + (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, + total_free ? s->free_add_partial * 100 / total_free : 0); + printf("Remove partial %8lu %8lu %3lu %3lu\n", + s->alloc_from_partial, s->free_remove_partial, + s->alloc_from_partial * 100 / total_alloc, + total_free ? s->free_remove_partial * 100 / total_free : 0); + + printf("Cpu partial list %8lu %8lu %3lu %3lu\n", + s->cpu_partial_alloc, s->cpu_partial_free, + s->cpu_partial_alloc * 100 / total_alloc, + total_free ? s->cpu_partial_free * 100 / total_free : 0); + + printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", + s->deactivate_remote_frees, s->free_frozen, + s->deactivate_remote_frees * 100 / total_alloc, + total_free ? s->free_frozen * 100 / total_free : 0); + + printf("Total %8lu %8lu\n\n", total_alloc, total_free); + + if (s->cpuslab_flush) + printf("Flushes %8lu\n", s->cpuslab_flush); + + total = s->deactivate_full + s->deactivate_empty + + s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass; + + if (total) { + printf("\nSlab Deactivation Occurrences %%\n"); + printf("-------------------------------------------------\n"); + printf("Slab full %7lu %3lu%%\n", + s->deactivate_full, (s->deactivate_full * 100) / total); + printf("Slab empty %7lu %3lu%%\n", + s->deactivate_empty, (s->deactivate_empty * 100) / total); + printf("Moved to head of partial list %7lu %3lu%%\n", + s->deactivate_to_head, (s->deactivate_to_head * 100) / total); + printf("Moved to tail of partial list %7lu %3lu%%\n", + s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); + printf("Deactivation bypass %7lu %3lu%%\n", + s->deactivate_bypass, (s->deactivate_bypass * 100) / total); + printf("Refilled from foreign frees %7lu %3lu%%\n", + s->alloc_refill, (s->alloc_refill * 100) / total); + printf("Node mismatch %7lu %3lu%%\n", + s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total); + } + + if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) { + printf("\nCmpxchg_double Looping\n------------------------\n"); + printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n", + s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail); + } +} + +static void report(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + printf("\nSlabcache: %-15s Aliases: %2d Order : %2d Objects: %lu\n", + s->name, s->aliases, s->order, s->objects); + if (s->hwcache_align) + printf("** Hardware cacheline aligned\n"); + if (s->cache_dma) + printf("** Memory is allocated in a special DMA zone\n"); + if (s->destroy_by_rcu) + printf("** Slabs are destroyed via RCU\n"); + if (s->reclaim_account) + printf("** Reclaim accounting active\n"); + + printf("\nSizes (bytes) Slabs Debug Memory\n"); + printf("------------------------------------------------------------------------\n"); + printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n", + s->object_size, s->slabs, onoff(s->sanity_checks), + s->slabs * (page_size << s->order)); + printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n", + s->slab_size, s->slabs - s->partial - s->cpu_slabs, + onoff(s->red_zone), s->objects * s->object_size); + printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n", + page_size << s->order, s->partial, onoff(s->poison), + s->slabs * (page_size << s->order) - s->objects * s->object_size); + printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n", + s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user), + (s->slab_size - s->object_size) * s->objects); + printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n", + s->align, s->objs_per_slab, onoff(s->trace), + ((page_size << s->order) - s->objs_per_slab * s->slab_size) * + s->slabs); + + ops(s); + show_tracking(s); + slab_numa(s, 1); + slab_stats(s); +} + +static void slabcache(struct slabinfo *s) +{ + char size_str[20]; + char dist_str[40]; + char flags[20]; + char *p = flags; + + if (strcmp(s->name, "*") == 0) + return; + + if (unreclaim_only && s->reclaim_account) + return; + + if (actual_slabs == 1) { + report(s); + return; + } + + if (skip_zero && !show_empty && !s->slabs) + return; + + if (show_empty && s->slabs) + return; + + if (sort_loss == 0) + store_size(size_str, slab_size(s)); + else + store_size(size_str, slab_waste(s)); + snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, + s->partial, s->cpu_slabs); + + if (!line++) + first_line(); + + if (s->aliases) + *p++ = '*'; + if (s->cache_dma) + *p++ = 'd'; + if (s->hwcache_align) + *p++ = 'A'; + if (s->poison) + *p++ = 'P'; + if (s->reclaim_account) + *p++ = 'a'; + if (s->red_zone) + *p++ = 'Z'; + if (s->sanity_checks) + *p++ = 'F'; + if (s->store_user) + *p++ = 'U'; + if (s->trace) + *p++ = 'T'; + + *p = 0; + if (show_activity) { + unsigned long total_alloc; + unsigned long total_free; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d %4ld %4ld\n", + s->name, s->objects, + total_alloc, total_free, + total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, + total_free ? (s->free_fastpath * 100 / total_free) : 0, + s->order_fallback, s->order, s->cmpxchg_double_fail, + s->cmpxchg_double_cpu_fail); + } else { + printf("%-21s %8ld %7d %15s %14s %4d %1d %3ld %3ld %s\n", + s->name, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, + flags); + } +} + +/* + * Analyze debug options. Return false if something is amiss. + */ +static int debug_opt_scan(char *opt) +{ + if (!opt || !opt[0] || strcmp(opt, "-") == 0) + return 1; + + if (strcasecmp(opt, "a") == 0) { + sanity = 1; + poison = 1; + redzone = 1; + tracking = 1; + return 1; + } + + for ( ; *opt; opt++) + switch (*opt) { + case 'F' : case 'f': + if (sanity) + return 0; + sanity = 1; + break; + case 'P' : case 'p': + if (poison) + return 0; + poison = 1; + break; + + case 'Z' : case 'z': + if (redzone) + return 0; + redzone = 1; + break; + + case 'U' : case 'u': + if (tracking) + return 0; + tracking = 1; + break; + + case 'T' : case 't': + if (tracing) + return 0; + tracing = 1; + break; + default: + return 0; + } + return 1; +} + +static int slab_empty(struct slabinfo *s) +{ + if (s->objects > 0) + return 0; + + /* + * We may still have slabs even if there are no objects. Shrinking will + * remove them. + */ + if (s->slabs != 0) + set_obj(s, "shrink", 1); + + return 1; +} + +static void slab_debug(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + if (sanity && !s->sanity_checks) { + set_obj(s, "sanity_checks", 1); + } + if (!sanity && s->sanity_checks) { + if (slab_empty(s)) + set_obj(s, "sanity_checks", 0); + else + fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name); + } + if (redzone && !s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 1); + else + fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name); + } + if (!redzone && s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 0); + else + fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name); + } + if (poison && !s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 1); + else + fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name); + } + if (!poison && s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 0); + else + fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name); + } + if (tracking && !s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 1); + else + fprintf(stderr, "%s not empty cannot enable tracking\n", s->name); + } + if (!tracking && s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 0); + else + fprintf(stderr, "%s not empty cannot disable tracking\n", s->name); + } + if (tracing && !s->trace) { + if (slabs == 1) + set_obj(s, "trace", 1); + else + fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); + } + if (!tracing && s->trace) + set_obj(s, "trace", 1); +} + +static void totals(void) +{ + struct slabinfo *s; + + int used_slabs = 0; + char b1[20], b2[20], b3[20], b4[20]; + unsigned long long max = 1ULL << 63; + + /* Object size */ + unsigned long long min_objsize = max, max_objsize = 0, avg_objsize; + + /* Number of partial slabs in a slabcache */ + unsigned long long min_partial = max, max_partial = 0, + avg_partial, total_partial = 0; + + /* Number of slabs in a slab cache */ + unsigned long long min_slabs = max, max_slabs = 0, + avg_slabs, total_slabs = 0; + + /* Size of the whole slab */ + unsigned long long min_size = max, max_size = 0, + avg_size, total_size = 0; + + /* Bytes used for object storage in a slab */ + unsigned long long min_used = max, max_used = 0, + avg_used, total_used = 0; + + /* Waste: Bytes used for alignment and padding */ + unsigned long long min_waste = max, max_waste = 0, + avg_waste, total_waste = 0; + /* Number of objects in a slab */ + unsigned long long min_objects = max, max_objects = 0, + avg_objects, total_objects = 0; + /* Waste per object */ + unsigned long long min_objwaste = max, + max_objwaste = 0, avg_objwaste, + total_objwaste = 0; + + /* Memory per object */ + unsigned long long min_memobj = max, + max_memobj = 0, avg_memobj, + total_objsize = 0; + + /* Percentage of partial slabs per slab */ + unsigned long min_ppart = 100, max_ppart = 0, + avg_ppart, total_ppart = 0; + + /* Number of objects in partial slabs */ + unsigned long min_partobj = max, max_partobj = 0, + avg_partobj, total_partobj = 0; + + /* Percentage of partial objects of all objects in a slab */ + unsigned long min_ppartobj = 100, max_ppartobj = 0, + avg_ppartobj, total_ppartobj = 0; + + + for (s = slabinfo; s < slabinfo + slabs; s++) { + unsigned long long size; + unsigned long used; + unsigned long long wasted; + unsigned long long objwaste; + unsigned long percentage_partial_slabs; + unsigned long percentage_partial_objs; + + if (!s->slabs || !s->objects) + continue; + + used_slabs++; + + size = slab_size(s); + used = s->objects * s->object_size; + wasted = size - used; + objwaste = s->slab_size - s->object_size; + + percentage_partial_slabs = s->partial * 100 / s->slabs; + if (percentage_partial_slabs > 100) + percentage_partial_slabs = 100; + + percentage_partial_objs = s->objects_partial * 100 + / s->objects; + + if (percentage_partial_objs > 100) + percentage_partial_objs = 100; + + if (s->object_size < min_objsize) + min_objsize = s->object_size; + if (s->partial < min_partial) + min_partial = s->partial; + if (s->slabs < min_slabs) + min_slabs = s->slabs; + if (size < min_size) + min_size = size; + if (wasted < min_waste) + min_waste = wasted; + if (objwaste < min_objwaste) + min_objwaste = objwaste; + if (s->objects < min_objects) + min_objects = s->objects; + if (used < min_used) + min_used = used; + if (s->objects_partial < min_partobj) + min_partobj = s->objects_partial; + if (percentage_partial_slabs < min_ppart) + min_ppart = percentage_partial_slabs; + if (percentage_partial_objs < min_ppartobj) + min_ppartobj = percentage_partial_objs; + if (s->slab_size < min_memobj) + min_memobj = s->slab_size; + + if (s->object_size > max_objsize) + max_objsize = s->object_size; + if (s->partial > max_partial) + max_partial = s->partial; + if (s->slabs > max_slabs) + max_slabs = s->slabs; + if (size > max_size) + max_size = size; + if (wasted > max_waste) + max_waste = wasted; + if (objwaste > max_objwaste) + max_objwaste = objwaste; + if (s->objects > max_objects) + max_objects = s->objects; + if (used > max_used) + max_used = used; + if (s->objects_partial > max_partobj) + max_partobj = s->objects_partial; + if (percentage_partial_slabs > max_ppart) + max_ppart = percentage_partial_slabs; + if (percentage_partial_objs > max_ppartobj) + max_ppartobj = percentage_partial_objs; + if (s->slab_size > max_memobj) + max_memobj = s->slab_size; + + total_partial += s->partial; + total_slabs += s->slabs; + total_size += size; + total_waste += wasted; + + total_objects += s->objects; + total_used += used; + total_partobj += s->objects_partial; + total_ppart += percentage_partial_slabs; + total_ppartobj += percentage_partial_objs; + + total_objwaste += s->objects * objwaste; + total_objsize += s->objects * s->slab_size; + } + + if (!total_objects) { + printf("No objects\n"); + return; + } + if (!used_slabs) { + printf("No slabs\n"); + return; + } + + /* Per slab averages */ + avg_partial = total_partial / used_slabs; + avg_slabs = total_slabs / used_slabs; + avg_size = total_size / used_slabs; + avg_waste = total_waste / used_slabs; + + avg_objects = total_objects / used_slabs; + avg_used = total_used / used_slabs; + avg_partobj = total_partobj / used_slabs; + avg_ppart = total_ppart / used_slabs; + avg_ppartobj = total_ppartobj / used_slabs; + + /* Per object object sizes */ + avg_objsize = total_used / total_objects; + avg_objwaste = total_objwaste / total_objects; + avg_partobj = total_partobj * 100 / total_objects; + avg_memobj = total_objsize / total_objects; + + printf("Slabcache Totals\n"); + printf("----------------\n"); + printf("Slabcaches : %15d Aliases : %11d->%-3d Active: %3d\n", + slabs, aliases, alias_targets, used_slabs); + + store_size(b1, total_size);store_size(b2, total_waste); + store_size(b3, total_waste * 100 / total_used); + printf("Memory used: %15s # Loss : %15s MRatio:%6s%%\n", b1, b2, b3); + + store_size(b1, total_objects);store_size(b2, total_partobj); + store_size(b3, total_partobj * 100 / total_objects); + printf("# Objects : %15s # PartObj: %15s ORatio:%6s%%\n", b1, b2, b3); + + printf("\n"); + printf("Per Cache Average " + "Min Max Total\n"); + printf("---------------------------------------" + "-------------------------------------\n"); + + store_size(b1, avg_objects);store_size(b2, min_objects); + store_size(b3, max_objects);store_size(b4, total_objects); + printf("#Objects %15s %15s %15s %15s\n", + b1, b2, b3, b4); + + store_size(b1, avg_slabs);store_size(b2, min_slabs); + store_size(b3, max_slabs);store_size(b4, total_slabs); + printf("#Slabs %15s %15s %15s %15s\n", + b1, b2, b3, b4); + + store_size(b1, avg_partial);store_size(b2, min_partial); + store_size(b3, max_partial);store_size(b4, total_partial); + printf("#PartSlab %15s %15s %15s %15s\n", + b1, b2, b3, b4); + store_size(b1, avg_ppart);store_size(b2, min_ppart); + store_size(b3, max_ppart); + store_size(b4, total_partial * 100 / total_slabs); + printf("%%PartSlab%15s%% %15s%% %15s%% %15s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_partobj);store_size(b2, min_partobj); + store_size(b3, max_partobj); + store_size(b4, total_partobj); + printf("PartObjs %15s %15s %15s %15s\n", + b1, b2, b3, b4); + + store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj); + store_size(b3, max_ppartobj); + store_size(b4, total_partobj * 100 / total_objects); + printf("%% PartObj%15s%% %15s%% %15s%% %15s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_size);store_size(b2, min_size); + store_size(b3, max_size);store_size(b4, total_size); + printf("Memory %15s %15s %15s %15s\n", + b1, b2, b3, b4); + + store_size(b1, avg_used);store_size(b2, min_used); + store_size(b3, max_used);store_size(b4, total_used); + printf("Used %15s %15s %15s %15s\n", + b1, b2, b3, b4); + + store_size(b1, avg_waste);store_size(b2, min_waste); + store_size(b3, max_waste);store_size(b4, total_waste); + printf("Loss %15s %15s %15s %15s\n", + b1, b2, b3, b4); + + printf("\n"); + printf("Per Object Average " + "Min Max\n"); + printf("---------------------------------------" + "--------------------\n"); + + store_size(b1, avg_memobj);store_size(b2, min_memobj); + store_size(b3, max_memobj); + printf("Memory %15s %15s %15s\n", + b1, b2, b3); + store_size(b1, avg_objsize);store_size(b2, min_objsize); + store_size(b3, max_objsize); + printf("User %15s %15s %15s\n", + b1, b2, b3); + + store_size(b1, avg_objwaste);store_size(b2, min_objwaste); + store_size(b3, max_objwaste); + printf("Loss %15s %15s %15s\n", + b1, b2, b3); +} + +static void sort_slabs(void) +{ + struct slabinfo *s1,*s2; + + for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { + for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { + int result; + + if (sort_size) { + if (slab_size(s1) == slab_size(s2)) + result = strcasecmp(s1->name, s2->name); + else + result = slab_size(s1) < slab_size(s2); + } else if (sort_active) { + if (slab_activity(s1) == slab_activity(s2)) + result = strcasecmp(s1->name, s2->name); + else + result = slab_activity(s1) < slab_activity(s2); + } else if (sort_loss) { + if (slab_waste(s1) == slab_waste(s2)) + result = strcasecmp(s1->name, s2->name); + else + result = slab_waste(s1) < slab_waste(s2); + } else if (sort_partial) { + if (s1->partial == s2->partial) + result = strcasecmp(s1->name, s2->name); + else + result = s1->partial < s2->partial; + } else + result = strcasecmp(s1->name, s2->name); + + if (show_inverted) + result = -result; + + if (result > 0) { + struct slabinfo t; + + memcpy(&t, s1, sizeof(struct slabinfo)); + memcpy(s1, s2, sizeof(struct slabinfo)); + memcpy(s2, &t, sizeof(struct slabinfo)); + } + } + } +} + +static void sort_aliases(void) +{ + struct aliasinfo *a1,*a2; + + for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { + for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { + char *n1, *n2; + + n1 = a1->name; + n2 = a2->name; + if (show_alias && !show_inverted) { + n1 = a1->ref; + n2 = a2->ref; + } + if (strcasecmp(n1, n2) > 0) { + struct aliasinfo t; + + memcpy(&t, a1, sizeof(struct aliasinfo)); + memcpy(a1, a2, sizeof(struct aliasinfo)); + memcpy(a2, &t, sizeof(struct aliasinfo)); + } + } + } +} + +static void link_slabs(void) +{ + struct aliasinfo *a; + struct slabinfo *s; + + for (a = aliasinfo; a < aliasinfo + aliases; a++) { + + for (s = slabinfo; s < slabinfo + slabs; s++) + if (strcmp(a->ref, s->name) == 0) { + a->slab = s; + s->refs++; + break; + } + if (s == slabinfo + slabs) + fatal("Unresolved alias %s\n", a->ref); + } +} + +static void alias(void) +{ + struct aliasinfo *a; + char *active = NULL; + + sort_aliases(); + link_slabs(); + + for(a = aliasinfo; a < aliasinfo + aliases; a++) { + + if (!show_single_ref && a->slab->refs == 1) + continue; + + if (!show_inverted) { + if (active) { + if (strcmp(a->slab->name, active) == 0) { + printf(" %s", a->name); + continue; + } + } + printf("\n%-12s <- %s", a->slab->name, a->name); + active = a->slab->name; + } + else + printf("%-15s -> %s\n", a->name, a->slab->name); + } + if (active) + printf("\n"); +} + + +static void rename_slabs(void) +{ + struct slabinfo *s; + struct aliasinfo *a; + + for (s = slabinfo; s < slabinfo + slabs; s++) { + if (*s->name != ':') + continue; + + if (s->refs > 1 && !show_first_alias) + continue; + + a = find_one_alias(s); + + if (a) + s->name = a->name; + else { + s->name = "*"; + actual_slabs--; + } + } +} + +static int slab_mismatch(char *slab) +{ + return regexec(&pattern, slab, 0, NULL, 0); +} + +static void read_slab_dir(void) +{ + DIR *dir; + struct dirent *de; + struct slabinfo *slab = slabinfo; + struct aliasinfo *alias = aliasinfo; + char *p; + char *t; + int count; + + if (chdir("/sys/kernel/slab") && chdir("/sys/slab")) + fatal("SYSFS support for SLUB not active\n"); + + dir = opendir("."); + while ((de = readdir(dir))) { + if (de->d_name[0] == '.' || + (de->d_name[0] != ':' && slab_mismatch(de->d_name))) + continue; + switch (de->d_type) { + case DT_LNK: + alias->name = strdup(de->d_name); + count = readlink(de->d_name, buffer, sizeof(buffer)-1); + + if (count < 0) + fatal("Cannot read symlink %s\n", de->d_name); + + buffer[count] = 0; + p = buffer + count; + while (p > buffer && p[-1] != '/') + p--; + alias->ref = strdup(p); + alias++; + break; + case DT_DIR: + if (chdir(de->d_name)) + fatal("Unable to access slab %s\n", slab->name); + slab->name = strdup(de->d_name); + slab->alias = 0; + slab->refs = 0; + slab->aliases = get_obj("aliases"); + slab->align = get_obj("align"); + slab->cache_dma = get_obj("cache_dma"); + slab->cpu_slabs = get_obj("cpu_slabs"); + slab->destroy_by_rcu = get_obj("destroy_by_rcu"); + slab->hwcache_align = get_obj("hwcache_align"); + slab->object_size = get_obj("object_size"); + slab->objects = get_obj("objects"); + slab->objects_partial = get_obj("objects_partial"); + slab->objects_total = get_obj("objects_total"); + slab->objs_per_slab = get_obj("objs_per_slab"); + slab->order = get_obj("order"); + slab->partial = get_obj("partial"); + slab->partial = get_obj_and_str("partial", &t); + decode_numa_list(slab->numa_partial, t); + free(t); + slab->poison = get_obj("poison"); + slab->reclaim_account = get_obj("reclaim_account"); + slab->red_zone = get_obj("red_zone"); + slab->sanity_checks = get_obj("sanity_checks"); + slab->slab_size = get_obj("slab_size"); + slab->slabs = get_obj_and_str("slabs", &t); + decode_numa_list(slab->numa, t); + free(t); + slab->store_user = get_obj("store_user"); + slab->trace = get_obj("trace"); + slab->alloc_fastpath = get_obj("alloc_fastpath"); + slab->alloc_slowpath = get_obj("alloc_slowpath"); + slab->free_fastpath = get_obj("free_fastpath"); + slab->free_slowpath = get_obj("free_slowpath"); + slab->free_frozen= get_obj("free_frozen"); + slab->free_add_partial = get_obj("free_add_partial"); + slab->free_remove_partial = get_obj("free_remove_partial"); + slab->alloc_from_partial = get_obj("alloc_from_partial"); + slab->alloc_slab = get_obj("alloc_slab"); + slab->alloc_refill = get_obj("alloc_refill"); + slab->free_slab = get_obj("free_slab"); + slab->cpuslab_flush = get_obj("cpuslab_flush"); + slab->deactivate_full = get_obj("deactivate_full"); + slab->deactivate_empty = get_obj("deactivate_empty"); + slab->deactivate_to_head = get_obj("deactivate_to_head"); + slab->deactivate_to_tail = get_obj("deactivate_to_tail"); + slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); + slab->order_fallback = get_obj("order_fallback"); + slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); + slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); + slab->cpu_partial_alloc = get_obj("cpu_partial_alloc"); + slab->cpu_partial_free = get_obj("cpu_partial_free"); + slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); + slab->deactivate_bypass = get_obj("deactivate_bypass"); + chdir(".."); + if (slab->name[0] == ':') + alias_targets++; + slab++; + break; + default : + fatal("Unknown file type %lx\n", de->d_type); + } + } + closedir(dir); + slabs = slab - slabinfo; + actual_slabs = slabs; + aliases = alias - aliasinfo; + if (slabs > MAX_SLABS) + fatal("Too many slabs\n"); + if (aliases > MAX_ALIASES) + fatal("Too many aliases\n"); +} + +static void output_slabs(void) +{ + struct slabinfo *slab; + int lines = output_lines; + + for (slab = slabinfo; (slab < slabinfo + slabs) && + lines != 0; slab++) { + + if (slab->alias) + continue; + + if (lines != -1) + lines--; + + if (show_numa) + slab_numa(slab, 0); + else if (show_track) + show_tracking(slab); + else if (validate) + slab_validate(slab); + else if (shrink) + slab_shrink(slab); + else if (set_debug) + slab_debug(slab); + else if (show_ops) + ops(slab); + else if (show_slab) + slabcache(slab); + else if (show_report) + report(slab); + } +} + +static void _xtotals(char *heading, char *underline, + int loss, int size, int partial) +{ + printf("%s%s", heading, underline); + line = 0; + sort_loss = loss; + sort_size = size; + sort_partial = partial; + sort_slabs(); + output_slabs(); +} + +static void xtotals(void) +{ + char *heading, *underline; + + totals(); + + link_slabs(); + rename_slabs(); + + heading = "\nSlabs sorted by size\n"; + underline = "--------------------\n"; + _xtotals(heading, underline, 0, 1, 0); + + heading = "\nSlabs sorted by loss\n"; + underline = "--------------------\n"; + _xtotals(heading, underline, 1, 0, 0); + + heading = "\nSlabs sorted by number of partial slabs\n"; + underline = "---------------------------------------\n"; + _xtotals(heading, underline, 0, 0, 1); + + printf("\n"); +} + +struct option opts[] = { + { "aliases", no_argument, NULL, 'a' }, + { "activity", no_argument, NULL, 'A' }, + { "Bytes", no_argument, NULL, 'B'}, + { "debug", optional_argument, NULL, 'd' }, + { "display-activity", no_argument, NULL, 'D' }, + { "empty", no_argument, NULL, 'e' }, + { "first-alias", no_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { "inverted", no_argument, NULL, 'i'}, + { "slabs", no_argument, NULL, 'l' }, + { "Loss", no_argument, NULL, 'L'}, + { "numa", no_argument, NULL, 'n' }, + { "lines", required_argument, NULL, 'N'}, + { "ops", no_argument, NULL, 'o' }, + { "partial", no_argument, NULL, 'p'}, + { "report", no_argument, NULL, 'r' }, + { "shrink", no_argument, NULL, 's' }, + { "Size", no_argument, NULL, 'S'}, + { "tracking", no_argument, NULL, 't'}, + { "Totals", no_argument, NULL, 'T'}, + { "Unreclaim", no_argument, NULL, 'U'}, + { "validate", no_argument, NULL, 'v' }, + { "Xtotals", no_argument, NULL, 'X'}, + { "zero", no_argument, NULL, 'z' }, + { "1ref", no_argument, NULL, '1'}, + { NULL, 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + int err; + char *pattern_source; + + page_size = getpagesize(); + + while ((c = getopt_long(argc, argv, "aABd::DefhilLnN:oPrsStTUvXz1", + opts, NULL)) != -1) + switch (c) { + case 'a': + show_alias = 1; + break; + case 'A': + sort_active = 1; + break; + case 'B': + show_bytes = 1; + break; + case 'd': + set_debug = 1; + if (!debug_opt_scan(optarg)) + fatal("Invalid debug option '%s'\n", optarg); + break; + case 'D': + show_activity = 1; + break; + case 'e': + show_empty = 1; + break; + case 'f': + show_first_alias = 1; + break; + case 'h': + usage(); + return 0; + case 'i': + show_inverted = 1; + break; + case 'l': + show_slab = 1; + break; + case 'L': + sort_loss = 1; + break; + case 'n': + show_numa = 1; + break; + case 'N': + if (optarg) { + output_lines = atoi(optarg); + if (output_lines < 1) + output_lines = 1; + } + break; + case 'o': + show_ops = 1; + break; + case 'r': + show_report = 1; + break; + case 'P': + sort_partial = 1; + break; + case 's': + shrink = 1; + break; + case 'S': + sort_size = 1; + break; + case 't': + show_track = 1; + break; + case 'T': + show_totals = 1; + break; + case 'U': + unreclaim_only = 1; + break; + case 'v': + validate = 1; + break; + case 'X': + if (output_lines == -1) + output_lines = 1; + extended_totals = 1; + show_bytes = 1; + break; + case 'z': + skip_zero = 0; + break; + case '1': + show_single_ref = 1; + break; + default: + fatal("%s: Invalid option '%c'\n", argv[0], optopt); + + } + + if (!show_slab && !show_alias && !show_track && !show_report + && !validate && !shrink && !set_debug && !show_ops) + show_slab = 1; + + if (argc > optind) + pattern_source = argv[optind]; + else + pattern_source = ".*"; + + err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB); + if (err) + fatal("%s: Invalid pattern '%s' code %d\n", + argv[0], pattern_source, err); + read_slab_dir(); + if (show_alias) { + alias(); + } else if (extended_totals) { + xtotals(); + } else if (show_totals) { + totals(); + } else { + link_slabs(); + rename_slabs(); + sort_slabs(); + output_slabs(); + } + return 0; +} diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore deleted file mode 100644 index 922879f93fc8..000000000000 --- a/tools/vm/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -slabinfo -page-types -page_owner_sort diff --git a/tools/vm/Makefile b/tools/vm/Makefile deleted file mode 100644 index 9860622cbb15..000000000000 --- a/tools/vm/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for vm tools -# -include ../scripts/Makefile.include - -TARGETS=page-types slabinfo page_owner_sort - -LIB_DIR = ../lib/api -LIBS = $(LIB_DIR)/libapi.a - -CFLAGS = -Wall -Wextra -I../lib/ -LDFLAGS = $(LIBS) - -all: $(TARGETS) - -$(TARGETS): $(LIBS) - -$(LIBS): - make -C $(LIB_DIR) - -%: %.c - $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) - -clean: - $(RM) page-types slabinfo page_owner_sort - make -C $(LIB_DIR) clean - -sbindir ?= /usr/sbin - -install: all - install -d $(DESTDIR)$(sbindir) - install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c deleted file mode 100644 index 381dcc00cb62..000000000000 --- a/tools/vm/page-types.c +++ /dev/null @@ -1,1396 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * page-types: Tool for querying page flags - * - * Copyright (C) 2009 Intel corporation - * - * Authors: Wu Fengguang - */ - -#define _FILE_OFFSET_BITS 64 -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../../include/uapi/linux/magic.h" -#include "../../include/uapi/linux/kernel-page-flags.h" -#include - -#ifndef MAX_PATH -# define MAX_PATH 256 -#endif - -#ifndef STR -# define _STR(x) #x -# define STR(x) _STR(x) -#endif - -/* - * pagemap kernel ABI bits - */ - -#define PM_ENTRY_BYTES 8 -#define PM_PFRAME_BITS 55 -#define PM_PFRAME_MASK ((1LL << PM_PFRAME_BITS) - 1) -#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) -#define MAX_SWAPFILES_SHIFT 5 -#define PM_SWAP_OFFSET(x) (((x) & PM_PFRAME_MASK) >> MAX_SWAPFILES_SHIFT) -#define PM_SOFT_DIRTY (1ULL << 55) -#define PM_MMAP_EXCLUSIVE (1ULL << 56) -#define PM_FILE (1ULL << 61) -#define PM_SWAP (1ULL << 62) -#define PM_PRESENT (1ULL << 63) - -/* - * kernel page flags - */ - -#define KPF_BYTES 8 -#define PROC_KPAGEFLAGS "/proc/kpageflags" -#define PROC_KPAGECOUNT "/proc/kpagecount" -#define PROC_KPAGECGROUP "/proc/kpagecgroup" - -#define SYS_KERNEL_MM_PAGE_IDLE "/sys/kernel/mm/page_idle/bitmap" - -/* [32-] kernel hacking assistances */ -#define KPF_RESERVED 32 -#define KPF_MLOCKED 33 -#define KPF_MAPPEDTODISK 34 -#define KPF_PRIVATE 35 -#define KPF_PRIVATE_2 36 -#define KPF_OWNER_PRIVATE 37 -#define KPF_ARCH 38 -#define KPF_UNCACHED 39 -#define KPF_SOFTDIRTY 40 -#define KPF_ARCH_2 41 - -/* [47-] take some arbitrary free slots for expanding overloaded flags - * not part of kernel API - */ -#define KPF_ANON_EXCLUSIVE 47 -#define KPF_READAHEAD 48 -#define KPF_SLOB_FREE 49 -#define KPF_SLUB_FROZEN 50 -#define KPF_SLUB_DEBUG 51 -#define KPF_FILE 61 -#define KPF_SWAP 62 -#define KPF_MMAP_EXCLUSIVE 63 - -#define KPF_ALL_BITS ((uint64_t)~0ULL) -#define KPF_HACKERS_BITS (0xffffULL << 32) -#define KPF_OVERLOADED_BITS (0xffffULL << 48) -#define BIT(name) (1ULL << KPF_##name) -#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL)) - -static const char * const page_flag_names[] = { - [KPF_LOCKED] = "L:locked", - [KPF_ERROR] = "E:error", - [KPF_REFERENCED] = "R:referenced", - [KPF_UPTODATE] = "U:uptodate", - [KPF_DIRTY] = "D:dirty", - [KPF_LRU] = "l:lru", - [KPF_ACTIVE] = "A:active", - [KPF_SLAB] = "S:slab", - [KPF_WRITEBACK] = "W:writeback", - [KPF_RECLAIM] = "I:reclaim", - [KPF_BUDDY] = "B:buddy", - - [KPF_MMAP] = "M:mmap", - [KPF_ANON] = "a:anonymous", - [KPF_SWAPCACHE] = "s:swapcache", - [KPF_SWAPBACKED] = "b:swapbacked", - [KPF_COMPOUND_HEAD] = "H:compound_head", - [KPF_COMPOUND_TAIL] = "T:compound_tail", - [KPF_HUGE] = "G:huge", - [KPF_UNEVICTABLE] = "u:unevictable", - [KPF_HWPOISON] = "X:hwpoison", - [KPF_NOPAGE] = "n:nopage", - [KPF_KSM] = "x:ksm", - [KPF_THP] = "t:thp", - [KPF_OFFLINE] = "o:offline", - [KPF_PGTABLE] = "g:pgtable", - [KPF_ZERO_PAGE] = "z:zero_page", - [KPF_IDLE] = "i:idle_page", - - [KPF_RESERVED] = "r:reserved", - [KPF_MLOCKED] = "m:mlocked", - [KPF_MAPPEDTODISK] = "d:mappedtodisk", - [KPF_PRIVATE] = "P:private", - [KPF_PRIVATE_2] = "p:private_2", - [KPF_OWNER_PRIVATE] = "O:owner_private", - [KPF_ARCH] = "h:arch", - [KPF_UNCACHED] = "c:uncached", - [KPF_SOFTDIRTY] = "f:softdirty", - [KPF_ARCH_2] = "H:arch_2", - - [KPF_ANON_EXCLUSIVE] = "d:anon_exclusive", - [KPF_READAHEAD] = "I:readahead", - [KPF_SLOB_FREE] = "P:slob_free", - [KPF_SLUB_FROZEN] = "A:slub_frozen", - [KPF_SLUB_DEBUG] = "E:slub_debug", - - [KPF_FILE] = "F:file", - [KPF_SWAP] = "w:swap", - [KPF_MMAP_EXCLUSIVE] = "1:mmap_exclusive", -}; - - -/* - * data structures - */ - -static int opt_raw; /* for kernel developers */ -static int opt_list; /* list pages (in ranges) */ -static int opt_mark_idle; /* set accessed bit */ -static int opt_no_summary; /* don't show summary */ -static pid_t opt_pid; /* process to walk */ -const char *opt_file; /* file or directory path */ -static uint64_t opt_cgroup; /* cgroup inode */ -static int opt_list_cgroup;/* list page cgroup */ -static int opt_list_mapcnt;/* list page map count */ -static const char *opt_kpageflags;/* kpageflags file to parse */ - -#define MAX_ADDR_RANGES 1024 -static int nr_addr_ranges; -static unsigned long opt_offset[MAX_ADDR_RANGES]; -static unsigned long opt_size[MAX_ADDR_RANGES]; - -#define MAX_VMAS 10240 -static int nr_vmas; -static unsigned long pg_start[MAX_VMAS]; -static unsigned long pg_end[MAX_VMAS]; - -#define MAX_BIT_FILTERS 64 -static int nr_bit_filters; -static uint64_t opt_mask[MAX_BIT_FILTERS]; -static uint64_t opt_bits[MAX_BIT_FILTERS]; - -static int page_size; - -static int pagemap_fd; -static int kpageflags_fd; -static int kpagecount_fd = -1; -static int kpagecgroup_fd = -1; -static int page_idle_fd = -1; - -static int opt_hwpoison; -static int opt_unpoison; - -static const char *hwpoison_debug_fs; -static int hwpoison_inject_fd; -static int hwpoison_forget_fd; - -#define HASH_SHIFT 13 -#define HASH_SIZE (1 << HASH_SHIFT) -#define HASH_MASK (HASH_SIZE - 1) -#define HASH_KEY(flags) (flags & HASH_MASK) - -static unsigned long total_pages; -static unsigned long nr_pages[HASH_SIZE]; -static uint64_t page_flags[HASH_SIZE]; - - -/* - * helper functions - */ - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define min_t(type, x, y) ({ \ - type __min1 = (x); \ - type __min2 = (y); \ - __min1 < __min2 ? __min1 : __min2; }) - -#define max_t(type, x, y) ({ \ - type __max1 = (x); \ - type __max2 = (y); \ - __max1 > __max2 ? __max1 : __max2; }) - -static unsigned long pages2mb(unsigned long pages) -{ - return (pages * page_size) >> 20; -} - -static void fatal(const char *x, ...) -{ - va_list ap; - - va_start(ap, x); - vfprintf(stderr, x, ap); - va_end(ap); - exit(EXIT_FAILURE); -} - -static int checked_open(const char *pathname, int flags) -{ - int fd = open(pathname, flags); - - if (fd < 0) { - perror(pathname); - exit(EXIT_FAILURE); - } - - return fd; -} - -/* - * pagemap/kpageflags routines - */ - -static unsigned long do_u64_read(int fd, const char *name, - uint64_t *buf, - unsigned long index, - unsigned long count) -{ - long bytes; - - if (index > ULONG_MAX / 8) - fatal("index overflow: %lu\n", index); - - bytes = pread(fd, buf, count * 8, (off_t)index * 8); - if (bytes < 0) { - perror(name); - exit(EXIT_FAILURE); - } - if (bytes % 8) - fatal("partial read: %lu bytes\n", bytes); - - return bytes / 8; -} - -static unsigned long kpageflags_read(uint64_t *buf, - unsigned long index, - unsigned long pages) -{ - return do_u64_read(kpageflags_fd, opt_kpageflags, buf, index, pages); -} - -static unsigned long kpagecgroup_read(uint64_t *buf, - unsigned long index, - unsigned long pages) -{ - if (kpagecgroup_fd < 0) - return pages; - - return do_u64_read(kpagecgroup_fd, opt_kpageflags, buf, index, pages); -} - -static unsigned long kpagecount_read(uint64_t *buf, - unsigned long index, - unsigned long pages) -{ - return kpagecount_fd < 0 ? pages : - do_u64_read(kpagecount_fd, PROC_KPAGECOUNT, - buf, index, pages); -} - -static unsigned long pagemap_read(uint64_t *buf, - unsigned long index, - unsigned long pages) -{ - return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); -} - -static unsigned long pagemap_pfn(uint64_t val) -{ - unsigned long pfn; - - if (val & PM_PRESENT) - pfn = PM_PFRAME(val); - else - pfn = 0; - - return pfn; -} - -static unsigned long pagemap_swap_offset(uint64_t val) -{ - return val & PM_SWAP ? PM_SWAP_OFFSET(val) : 0; -} - -/* - * page flag names - */ - -static char *page_flag_name(uint64_t flags) -{ - static char buf[65]; - int present; - size_t i, j; - - for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { - present = (flags >> i) & 1; - if (!page_flag_names[i]) { - if (present) - fatal("unknown flag bit %d\n", i); - continue; - } - buf[j++] = present ? page_flag_names[i][0] : '_'; - } - - return buf; -} - -static char *page_flag_longname(uint64_t flags) -{ - static char buf[1024]; - size_t i, n; - - for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { - if (!page_flag_names[i]) - continue; - if ((flags >> i) & 1) - n += snprintf(buf + n, sizeof(buf) - n, "%s,", - page_flag_names[i] + 2); - } - if (n) - n--; - buf[n] = '\0'; - - return buf; -} - - -/* - * page list and summary - */ - -static void show_page_range(unsigned long voffset, unsigned long offset, - unsigned long size, uint64_t flags, - uint64_t cgroup, uint64_t mapcnt) -{ - static uint64_t flags0; - static uint64_t cgroup0; - static uint64_t mapcnt0; - static unsigned long voff; - static unsigned long index; - static unsigned long count; - - if (flags == flags0 && cgroup == cgroup0 && mapcnt == mapcnt0 && - offset == index + count && size && voffset == voff + count) { - count += size; - return; - } - - if (count) { - if (opt_pid) - printf("%lx\t", voff); - if (opt_file) - printf("%lx\t", voff); - if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup0); - if (opt_list_mapcnt) - printf("%lu\t", mapcnt0); - printf("%lx\t%lx\t%s\n", - index, count, page_flag_name(flags0)); - } - - flags0 = flags; - cgroup0 = cgroup; - mapcnt0 = mapcnt; - index = offset; - voff = voffset; - count = size; -} - -static void flush_page_range(void) -{ - show_page_range(0, 0, 0, 0, 0, 0); -} - -static void show_page(unsigned long voffset, unsigned long offset, - uint64_t flags, uint64_t cgroup, uint64_t mapcnt) -{ - if (opt_pid) - printf("%lx\t", voffset); - if (opt_file) - printf("%lx\t", voffset); - if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup); - if (opt_list_mapcnt) - printf("%lu\t", mapcnt); - - printf("%lx\t%s\n", offset, page_flag_name(flags)); -} - -static void show_summary(void) -{ - size_t i; - - printf(" flags\tpage-count MB" - " symbolic-flags\t\t\tlong-symbolic-flags\n"); - - for (i = 0; i < ARRAY_SIZE(nr_pages); i++) { - if (nr_pages[i]) - printf("0x%016llx\t%10lu %8lu %s\t%s\n", - (unsigned long long)page_flags[i], - nr_pages[i], - pages2mb(nr_pages[i]), - page_flag_name(page_flags[i]), - page_flag_longname(page_flags[i])); - } - - printf(" total\t%10lu %8lu\n", - total_pages, pages2mb(total_pages)); -} - - -/* - * page flag filters - */ - -static int bit_mask_ok(uint64_t flags) -{ - int i; - - for (i = 0; i < nr_bit_filters; i++) { - if (opt_bits[i] == KPF_ALL_BITS) { - if ((flags & opt_mask[i]) == 0) - return 0; - } else { - if ((flags & opt_mask[i]) != opt_bits[i]) - return 0; - } - } - - return 1; -} - -static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) -{ - /* Anonymous pages overload PG_mappedtodisk */ - if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK))) - flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE); - - /* SLOB/SLUB overload several page flags */ - if (flags & BIT(SLAB)) { - if (flags & BIT(PRIVATE)) - flags ^= BIT(PRIVATE) | BIT(SLOB_FREE); - if (flags & BIT(ACTIVE)) - flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN); - if (flags & BIT(ERROR)) - flags ^= BIT(ERROR) | BIT(SLUB_DEBUG); - } - - /* PG_reclaim is overloaded as PG_readahead in the read path */ - if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM)) - flags ^= BIT(RECLAIM) | BIT(READAHEAD); - - if (pme & PM_SOFT_DIRTY) - flags |= BIT(SOFTDIRTY); - if (pme & PM_FILE) - flags |= BIT(FILE); - if (pme & PM_SWAP) - flags |= BIT(SWAP); - if (pme & PM_MMAP_EXCLUSIVE) - flags |= BIT(MMAP_EXCLUSIVE); - - return flags; -} - -static uint64_t well_known_flags(uint64_t flags) -{ - /* hide flags intended only for kernel hacker */ - flags &= ~KPF_HACKERS_BITS; - - /* hide non-hugeTLB compound pages */ - if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE))) - flags &= ~BITS_COMPOUND; - - return flags; -} - -static uint64_t kpageflags_flags(uint64_t flags, uint64_t pme) -{ - if (opt_raw) - flags = expand_overloaded_flags(flags, pme); - else - flags = well_known_flags(flags); - - return flags; -} - -/* - * page actions - */ - -static void prepare_hwpoison_fd(void) -{ - char buf[MAX_PATH + 1]; - - hwpoison_debug_fs = debugfs__mount(); - if (!hwpoison_debug_fs) { - perror("mount debugfs"); - exit(EXIT_FAILURE); - } - - if (opt_hwpoison && !hwpoison_inject_fd) { - snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn", - hwpoison_debug_fs); - hwpoison_inject_fd = checked_open(buf, O_WRONLY); - } - - if (opt_unpoison && !hwpoison_forget_fd) { - snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn", - hwpoison_debug_fs); - hwpoison_forget_fd = checked_open(buf, O_WRONLY); - } -} - -static int hwpoison_page(unsigned long offset) -{ - char buf[100]; - int len; - - len = sprintf(buf, "0x%lx\n", offset); - len = write(hwpoison_inject_fd, buf, len); - if (len < 0) { - perror("hwpoison inject"); - return len; - } - return 0; -} - -static int unpoison_page(unsigned long offset) -{ - char buf[100]; - int len; - - len = sprintf(buf, "0x%lx\n", offset); - len = write(hwpoison_forget_fd, buf, len); - if (len < 0) { - perror("hwpoison forget"); - return len; - } - return 0; -} - -static int mark_page_idle(unsigned long offset) -{ - static unsigned long off; - static uint64_t buf; - int len; - - if ((offset / 64 == off / 64) || buf == 0) { - buf |= 1UL << (offset % 64); - off = offset; - return 0; - } - - len = pwrite(page_idle_fd, &buf, 8, 8 * (off / 64)); - if (len < 0) { - perror("mark page idle"); - return len; - } - - buf = 1UL << (offset % 64); - off = offset; - - return 0; -} - -/* - * page frame walker - */ - -static size_t hash_slot(uint64_t flags) -{ - size_t k = HASH_KEY(flags); - size_t i; - - /* Explicitly reserve slot 0 for flags 0: the following logic - * cannot distinguish an unoccupied slot from slot (flags==0). - */ - if (flags == 0) - return 0; - - /* search through the remaining (HASH_SIZE-1) slots */ - for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) { - if (!k || k >= ARRAY_SIZE(page_flags)) - k = 1; - if (page_flags[k] == 0) { - page_flags[k] = flags; - return k; - } - if (page_flags[k] == flags) - return k; - } - - fatal("hash table full: bump up HASH_SHIFT?\n"); - exit(EXIT_FAILURE); -} - -static void add_page(unsigned long voffset, unsigned long offset, - uint64_t flags, uint64_t cgroup, uint64_t mapcnt, - uint64_t pme) -{ - flags = kpageflags_flags(flags, pme); - - if (!bit_mask_ok(flags)) - return; - - if (opt_cgroup && cgroup != (uint64_t)opt_cgroup) - return; - - if (opt_hwpoison) - hwpoison_page(offset); - if (opt_unpoison) - unpoison_page(offset); - - if (opt_mark_idle) - mark_page_idle(offset); - - if (opt_list == 1) - show_page_range(voffset, offset, 1, flags, cgroup, mapcnt); - else if (opt_list == 2) - show_page(voffset, offset, flags, cgroup, mapcnt); - - nr_pages[hash_slot(flags)]++; - total_pages++; -} - -#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ -static void walk_pfn(unsigned long voffset, - unsigned long index, - unsigned long count, - uint64_t pme) -{ - uint64_t buf[KPAGEFLAGS_BATCH]; - uint64_t cgi[KPAGEFLAGS_BATCH]; - uint64_t cnt[KPAGEFLAGS_BATCH]; - unsigned long batch; - unsigned long pages; - unsigned long i; - - /* - * kpagecgroup_read() reads only if kpagecgroup were opened, but - * /proc/kpagecgroup might even not exist, so it's better to fill - * them with zeros here. - */ - if (count == 1) - cgi[0] = 0; - else - memset(cgi, 0, sizeof cgi); - - while (count) { - batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); - pages = kpageflags_read(buf, index, batch); - if (pages == 0) - break; - - if (kpagecgroup_read(cgi, index, pages) != pages) - fatal("kpagecgroup returned fewer pages than expected"); - - if (kpagecount_read(cnt, index, pages) != pages) - fatal("kpagecount returned fewer pages than expected"); - - for (i = 0; i < pages; i++) - add_page(voffset + i, index + i, - buf[i], cgi[i], cnt[i], pme); - - index += pages; - count -= pages; - } -} - -static void walk_swap(unsigned long voffset, uint64_t pme) -{ - uint64_t flags = kpageflags_flags(0, pme); - - if (!bit_mask_ok(flags)) - return; - - if (opt_cgroup) - return; - - if (opt_list == 1) - show_page_range(voffset, pagemap_swap_offset(pme), - 1, flags, 0, 0); - else if (opt_list == 2) - show_page(voffset, pagemap_swap_offset(pme), flags, 0, 0); - - nr_pages[hash_slot(flags)]++; - total_pages++; -} - -#define PAGEMAP_BATCH (64 << 10) -static void walk_vma(unsigned long index, unsigned long count) -{ - uint64_t buf[PAGEMAP_BATCH]; - unsigned long batch; - unsigned long pages; - unsigned long pfn; - unsigned long i; - - while (count) { - batch = min_t(unsigned long, count, PAGEMAP_BATCH); - pages = pagemap_read(buf, index, batch); - if (pages == 0) - break; - - for (i = 0; i < pages; i++) { - pfn = pagemap_pfn(buf[i]); - if (pfn) - walk_pfn(index + i, pfn, 1, buf[i]); - if (buf[i] & PM_SWAP) - walk_swap(index + i, buf[i]); - } - - index += pages; - count -= pages; - } -} - -static void walk_task(unsigned long index, unsigned long count) -{ - const unsigned long end = index + count; - unsigned long start; - int i = 0; - - while (index < end) { - - while (pg_end[i] <= index) - if (++i >= nr_vmas) - return; - if (pg_start[i] >= end) - return; - - start = max_t(unsigned long, pg_start[i], index); - index = min_t(unsigned long, pg_end[i], end); - - assert(start < index); - walk_vma(start, index - start); - } -} - -static void add_addr_range(unsigned long offset, unsigned long size) -{ - if (nr_addr_ranges >= MAX_ADDR_RANGES) - fatal("too many addr ranges\n"); - - opt_offset[nr_addr_ranges] = offset; - opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); - nr_addr_ranges++; -} - -static void walk_addr_ranges(void) -{ - int i; - - kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); - - if (!nr_addr_ranges) - add_addr_range(0, ULONG_MAX); - - for (i = 0; i < nr_addr_ranges; i++) - if (!opt_pid) - walk_pfn(opt_offset[i], opt_offset[i], opt_size[i], 0); - else - walk_task(opt_offset[i], opt_size[i]); - - if (opt_mark_idle) - mark_page_idle(0); - - close(kpageflags_fd); -} - - -/* - * user interface - */ - -static const char *page_flag_type(uint64_t flag) -{ - if (flag & KPF_HACKERS_BITS) - return "(r)"; - if (flag & KPF_OVERLOADED_BITS) - return "(o)"; - return " "; -} - -static void usage(void) -{ - size_t i, j; - - printf( -"page-types [options]\n" -" -r|--raw Raw mode, for kernel developers\n" -" -d|--describe flags Describe flags\n" -" -a|--addr addr-spec Walk a range of pages\n" -" -b|--bits bits-spec Walk pages with specified bits\n" -" -c|--cgroup path|@inode Walk pages within memory cgroup\n" -" -p|--pid pid Walk process address space\n" -" -f|--file filename Walk file address space\n" -" -i|--mark-idle Mark pages idle\n" -" -l|--list Show page details in ranges\n" -" -L|--list-each Show page details one by one\n" -" -C|--list-cgroup Show cgroup inode for pages\n" -" -M|--list-mapcnt Show page map count\n" -" -N|--no-summary Don't show summary info\n" -" -X|--hwpoison hwpoison pages\n" -" -x|--unpoison unpoison pages\n" -" -F|--kpageflags filename kpageflags file to parse\n" -" -h|--help Show this usage message\n" -"flags:\n" -" 0x10 bitfield format, e.g.\n" -" anon bit-name, e.g.\n" -" 0x10,anon comma-separated list, e.g.\n" -"addr-spec:\n" -" N one page at offset N (unit: pages)\n" -" N+M pages range from N to N+M-1\n" -" N,M pages range from N to M-1\n" -" N, pages range from N to end\n" -" ,M pages range from 0 to M-1\n" -"bits-spec:\n" -" bit1,bit2 (flags & (bit1|bit2)) != 0\n" -" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" -" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n" -" =bit1,bit2 flags == (bit1|bit2)\n" -"bit-names:\n" - ); - - for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { - if (!page_flag_names[i]) - continue; - printf("%16s%s", page_flag_names[i] + 2, - page_flag_type(1ULL << i)); - if (++j > 3) { - j = 0; - putchar('\n'); - } - } - printf("\n " - "(r) raw mode bits (o) overloaded bits\n"); -} - -static unsigned long long parse_number(const char *str) -{ - unsigned long long n; - - n = strtoll(str, NULL, 0); - - if (n == 0 && str[0] != '0') - fatal("invalid name or number: %s\n", str); - - return n; -} - -static void parse_pid(const char *str) -{ - FILE *file; - char buf[5000]; - - opt_pid = parse_number(str); - - sprintf(buf, "/proc/%d/pagemap", opt_pid); - pagemap_fd = checked_open(buf, O_RDONLY); - - sprintf(buf, "/proc/%d/maps", opt_pid); - file = fopen(buf, "r"); - if (!file) { - perror(buf); - exit(EXIT_FAILURE); - } - - while (fgets(buf, sizeof(buf), file) != NULL) { - unsigned long vm_start; - unsigned long vm_end; - unsigned long long pgoff; - int major, minor; - char r, w, x, s; - unsigned long ino; - int n; - - n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", - &vm_start, - &vm_end, - &r, &w, &x, &s, - &pgoff, - &major, &minor, - &ino); - if (n < 10) { - fprintf(stderr, "unexpected line: %s\n", buf); - continue; - } - pg_start[nr_vmas] = vm_start / page_size; - pg_end[nr_vmas] = vm_end / page_size; - if (++nr_vmas >= MAX_VMAS) { - fprintf(stderr, "too many VMAs\n"); - break; - } - } - fclose(file); -} - -static void show_file(const char *name, const struct stat *st) -{ - unsigned long long size = st->st_size; - char atime[64], mtime[64]; - long now = time(NULL); - - printf("%s\tInode: %u\tSize: %llu (%llu pages)\n", - name, (unsigned)st->st_ino, - size, (size + page_size - 1) / page_size); - - strftime(atime, sizeof(atime), "%c", localtime(&st->st_atime)); - strftime(mtime, sizeof(mtime), "%c", localtime(&st->st_mtime)); - - printf("Modify: %s (%ld seconds ago)\nAccess: %s (%ld seconds ago)\n", - mtime, now - st->st_mtime, - atime, now - st->st_atime); -} - -static sigjmp_buf sigbus_jmp; - -static void * volatile sigbus_addr; - -static void sigbus_handler(int sig, siginfo_t *info, void *ucontex) -{ - (void)sig; - (void)ucontex; - sigbus_addr = info ? info->si_addr : NULL; - siglongjmp(sigbus_jmp, 1); -} - -static struct sigaction sigbus_action = { - .sa_sigaction = sigbus_handler, - .sa_flags = SA_SIGINFO, -}; - -static void walk_file_range(const char *name, int fd, - unsigned long off, unsigned long end) -{ - uint8_t vec[PAGEMAP_BATCH]; - uint64_t buf[PAGEMAP_BATCH], flags; - uint64_t cgroup = 0; - uint64_t mapcnt = 0; - unsigned long nr_pages, pfn, i; - ssize_t len; - void *ptr; - int first = 1; - - for (; off < end; off += len) { - nr_pages = (end - off + page_size - 1) / page_size; - if (nr_pages > PAGEMAP_BATCH) - nr_pages = PAGEMAP_BATCH; - len = nr_pages * page_size; - - ptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, off); - if (ptr == MAP_FAILED) - fatal("mmap failed: %s", name); - - /* determine cached pages */ - if (mincore(ptr, len, vec)) - fatal("mincore failed: %s", name); - - /* turn off readahead */ - if (madvise(ptr, len, MADV_RANDOM)) - fatal("madvice failed: %s", name); - - if (sigsetjmp(sigbus_jmp, 1)) { - end = off + sigbus_addr ? sigbus_addr - ptr : 0; - fprintf(stderr, "got sigbus at offset %lld: %s\n", - (long long)end, name); - goto got_sigbus; - } - - /* populate ptes */ - for (i = 0; i < nr_pages ; i++) { - if (vec[i] & 1) - (void)*(volatile int *)(ptr + i * page_size); - } -got_sigbus: - - /* turn off harvesting reference bits */ - if (madvise(ptr, len, MADV_SEQUENTIAL)) - fatal("madvice failed: %s", name); - - if (pagemap_read(buf, (unsigned long)ptr / page_size, - nr_pages) != nr_pages) - fatal("cannot read pagemap"); - - munmap(ptr, len); - - for (i = 0; i < nr_pages; i++) { - pfn = pagemap_pfn(buf[i]); - if (!pfn) - continue; - if (!kpageflags_read(&flags, pfn, 1)) - continue; - if (!kpagecgroup_read(&cgroup, pfn, 1)) - fatal("kpagecgroup_read failed"); - if (!kpagecount_read(&mapcnt, pfn, 1)) - fatal("kpagecount_read failed"); - if (first && opt_list) { - first = 0; - flush_page_range(); - } - add_page(off / page_size + i, pfn, - flags, cgroup, mapcnt, buf[i]); - } - } -} - -static void walk_file(const char *name, const struct stat *st) -{ - int i; - int fd; - - fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW); - - if (!nr_addr_ranges) - add_addr_range(0, st->st_size / page_size); - - for (i = 0; i < nr_addr_ranges; i++) - walk_file_range(name, fd, opt_offset[i] * page_size, - (opt_offset[i] + opt_size[i]) * page_size); - - close(fd); -} - -int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f) -{ - (void)f; - switch (type) { - case FTW_F: - if (S_ISREG(st->st_mode)) - walk_file(name, st); - break; - case FTW_DNR: - fprintf(stderr, "cannot read dir: %s\n", name); - break; - } - return 0; -} - -struct stat st; - -static void walk_page_cache(void) -{ - kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); - pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY); - sigaction(SIGBUS, &sigbus_action, NULL); - - if (stat(opt_file, &st)) - fatal("stat failed: %s\n", opt_file); - - if (S_ISREG(st.st_mode)) { - walk_file(opt_file, &st); - } else if (S_ISDIR(st.st_mode)) { - /* do not follow symlinks and mountpoints */ - if (nftw(opt_file, walk_tree, 64, FTW_MOUNT | FTW_PHYS) < 0) - fatal("nftw failed: %s\n", opt_file); - } else - fatal("unhandled file type: %s\n", opt_file); - - close(kpageflags_fd); - close(pagemap_fd); - signal(SIGBUS, SIG_DFL); -} - -static void parse_file(const char *name) -{ - opt_file = name; -} - -static void parse_cgroup(const char *path) -{ - if (path[0] == '@') { - opt_cgroup = parse_number(path + 1); - return; - } - - struct stat st; - - if (stat(path, &st)) - fatal("stat failed: %s: %m\n", path); - - if (!S_ISDIR(st.st_mode)) - fatal("cgroup supposed to be a directory: %s\n", path); - - opt_cgroup = st.st_ino; -} - -static void parse_addr_range(const char *optarg) -{ - unsigned long offset; - unsigned long size; - char *p; - - p = strchr(optarg, ','); - if (!p) - p = strchr(optarg, '+'); - - if (p == optarg) { - offset = 0; - size = parse_number(p + 1); - } else if (p) { - offset = parse_number(optarg); - if (p[1] == '\0') - size = ULONG_MAX; - else { - size = parse_number(p + 1); - if (*p == ',') { - if (size < offset) - fatal("invalid range: %lu,%lu\n", - offset, size); - size -= offset; - } - } - } else { - offset = parse_number(optarg); - size = 1; - } - - add_addr_range(offset, size); -} - -static void add_bits_filter(uint64_t mask, uint64_t bits) -{ - if (nr_bit_filters >= MAX_BIT_FILTERS) - fatal("too much bit filters\n"); - - opt_mask[nr_bit_filters] = mask; - opt_bits[nr_bit_filters] = bits; - nr_bit_filters++; -} - -static uint64_t parse_flag_name(const char *str, int len) -{ - size_t i; - - if (!*str || !len) - return 0; - - if (len <= 8 && !strncmp(str, "compound", len)) - return BITS_COMPOUND; - - for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) { - if (!page_flag_names[i]) - continue; - if (!strncmp(str, page_flag_names[i] + 2, len)) - return 1ULL << i; - } - - return parse_number(str); -} - -static uint64_t parse_flag_names(const char *str, int all) -{ - const char *p = str; - uint64_t flags = 0; - - while (1) { - if (*p == ',' || *p == '=' || *p == '\0') { - if ((*str != '~') || (*str == '~' && all && *++str)) - flags |= parse_flag_name(str, p - str); - if (*p != ',') - break; - str = p + 1; - } - p++; - } - - return flags; -} - -static void parse_bits_mask(const char *optarg) -{ - uint64_t mask; - uint64_t bits; - const char *p; - - p = strchr(optarg, '='); - if (p == optarg) { - mask = KPF_ALL_BITS; - bits = parse_flag_names(p + 1, 0); - } else if (p) { - mask = parse_flag_names(optarg, 0); - bits = parse_flag_names(p + 1, 0); - } else if (strchr(optarg, '~')) { - mask = parse_flag_names(optarg, 1); - bits = parse_flag_names(optarg, 0); - } else { - mask = parse_flag_names(optarg, 0); - bits = KPF_ALL_BITS; - } - - add_bits_filter(mask, bits); -} - -static void parse_kpageflags(const char *name) -{ - opt_kpageflags = name; -} - -static void describe_flags(const char *optarg) -{ - uint64_t flags = parse_flag_names(optarg, 0); - - printf("0x%016llx\t%s\t%s\n", - (unsigned long long)flags, - page_flag_name(flags), - page_flag_longname(flags)); -} - -static const struct option opts[] = { - { "raw" , 0, NULL, 'r' }, - { "pid" , 1, NULL, 'p' }, - { "file" , 1, NULL, 'f' }, - { "addr" , 1, NULL, 'a' }, - { "bits" , 1, NULL, 'b' }, - { "cgroup" , 1, NULL, 'c' }, - { "describe" , 1, NULL, 'd' }, - { "mark-idle" , 0, NULL, 'i' }, - { "list" , 0, NULL, 'l' }, - { "list-each" , 0, NULL, 'L' }, - { "list-cgroup", 0, NULL, 'C' }, - { "list-mapcnt", 0, NULL, 'M' }, - { "no-summary", 0, NULL, 'N' }, - { "hwpoison" , 0, NULL, 'X' }, - { "unpoison" , 0, NULL, 'x' }, - { "kpageflags", 0, NULL, 'F' }, - { "help" , 0, NULL, 'h' }, - { NULL , 0, NULL, 0 } -}; - -int main(int argc, char *argv[]) -{ - int c; - - page_size = getpagesize(); - - while ((c = getopt_long(argc, argv, - "rp:f:a:b:d:c:CilLMNXxF:h", - opts, NULL)) != -1) { - switch (c) { - case 'r': - opt_raw = 1; - break; - case 'p': - parse_pid(optarg); - break; - case 'f': - parse_file(optarg); - break; - case 'a': - parse_addr_range(optarg); - break; - case 'b': - parse_bits_mask(optarg); - break; - case 'c': - parse_cgroup(optarg); - break; - case 'C': - opt_list_cgroup = 1; - break; - case 'd': - describe_flags(optarg); - exit(0); - case 'i': - opt_mark_idle = 1; - break; - case 'l': - opt_list = 1; - break; - case 'L': - opt_list = 2; - break; - case 'M': - opt_list_mapcnt = 1; - break; - case 'N': - opt_no_summary = 1; - break; - case 'X': - opt_hwpoison = 1; - prepare_hwpoison_fd(); - break; - case 'x': - opt_unpoison = 1; - prepare_hwpoison_fd(); - break; - case 'F': - parse_kpageflags(optarg); - break; - case 'h': - usage(); - exit(0); - default: - usage(); - exit(1); - } - } - - if (!opt_kpageflags) - opt_kpageflags = PROC_KPAGEFLAGS; - - if (opt_cgroup || opt_list_cgroup) - kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY); - - if (opt_list && opt_list_mapcnt) - kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY); - - if (opt_mark_idle) - page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR); - - if (opt_list && opt_pid) - printf("voffset\t"); - if (opt_list && opt_file) - printf("foffset\t"); - if (opt_list && opt_list_cgroup) - printf("cgroup\t"); - if (opt_list && opt_list_mapcnt) - printf("map-cnt\t"); - - if (opt_list == 1) - printf("offset\tlen\tflags\n"); - if (opt_list == 2) - printf("offset\tflags\n"); - - if (opt_file) - walk_page_cache(); - else - walk_addr_ranges(); - - if (opt_list == 1) - flush_page_range(); - - if (opt_no_summary) - return 0; - - if (opt_list) - printf("\n\n"); - - if (opt_file) { - show_file(opt_file, &st); - printf("\n"); - } - - show_summary(); - - if (opt_list_mapcnt) - close(kpagecount_fd); - - if (page_idle_fd >= 0) - close(page_idle_fd); - - return 0; -} diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c deleted file mode 100644 index 7c2ac124cdc8..000000000000 --- a/tools/vm/page_owner_sort.c +++ /dev/null @@ -1,897 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * User-space helper to sort the output of /sys/kernel/debug/page_owner - * - * Example use: - * cat /sys/kernel/debug/page_owner > page_owner_full.txt - * ./page_owner_sort page_owner_full.txt sorted_page_owner.txt - * Or sort by total memory: - * ./page_owner_sort -m page_owner_full.txt sorted_page_owner.txt - * - * See Documentation/mm/page_owner.rst -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define bool int -#define true 1 -#define false 0 -#define TASK_COMM_LEN 16 - -struct block_list { - char *txt; - char *comm; // task command name - char *stacktrace; - __u64 ts_nsec; - __u64 free_ts_nsec; - int len; - int num; - int page_num; - pid_t pid; - pid_t tgid; - int allocator; -}; -enum FILTER_BIT { - FILTER_UNRELEASE = 1<<1, - FILTER_PID = 1<<2, - FILTER_TGID = 1<<3, - FILTER_COMM = 1<<4 -}; -enum CULL_BIT { - CULL_UNRELEASE = 1<<1, - CULL_PID = 1<<2, - CULL_TGID = 1<<3, - CULL_COMM = 1<<4, - CULL_STACKTRACE = 1<<5, - CULL_ALLOCATOR = 1<<6 -}; -enum ALLOCATOR_BIT { - ALLOCATOR_CMA = 1<<1, - ALLOCATOR_SLAB = 1<<2, - ALLOCATOR_VMALLOC = 1<<3, - ALLOCATOR_OTHERS = 1<<4 -}; -enum ARG_TYPE { - ARG_TXT, ARG_COMM, ARG_STACKTRACE, ARG_ALLOC_TS, ARG_FREE_TS, - ARG_CULL_TIME, ARG_PAGE_NUM, ARG_PID, ARG_TGID, ARG_UNKNOWN, ARG_FREE, - ARG_ALLOCATOR -}; -enum SORT_ORDER { - SORT_ASC = 1, - SORT_DESC = -1, -}; -struct filter_condition { - pid_t *pids; - pid_t *tgids; - char **comms; - int pids_size; - int tgids_size; - int comms_size; -}; -struct sort_condition { - int (**cmps)(const void *, const void *); - int *signs; - int size; -}; -static struct filter_condition fc; -static struct sort_condition sc; -static regex_t order_pattern; -static regex_t pid_pattern; -static regex_t tgid_pattern; -static regex_t comm_pattern; -static regex_t ts_nsec_pattern; -static regex_t free_ts_nsec_pattern; -static struct block_list *list; -static int list_size; -static int max_size; -static int cull; -static int filter; -static bool debug_on; - -static void set_single_cmp(int (*cmp)(const void *, const void *), int sign); - -int read_block(char *buf, char *ext_buf, int buf_size, FILE *fin) -{ - char *curr = buf, *const buf_end = buf + buf_size; - - while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) { - if (*curr == '\n') { /* empty line */ - return curr - buf; - } - if (!strncmp(curr, "PFN", 3)) { - strcpy(ext_buf, curr); - continue; - } - curr += strlen(curr); - } - - return -1; /* EOF or no space left in buf. */ -} - -static int compare_txt(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return strcmp(l1->txt, l2->txt); -} - -static int compare_stacktrace(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return strcmp(l1->stacktrace, l2->stacktrace); -} - -static int compare_num(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return l1->num - l2->num; -} - -static int compare_page_num(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return l1->page_num - l2->page_num; -} - -static int compare_pid(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return l1->pid - l2->pid; -} - -static int compare_tgid(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return l1->tgid - l2->tgid; -} - -static int compare_allocator(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return l1->allocator - l2->allocator; -} - -static int compare_comm(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return strcmp(l1->comm, l2->comm); -} - -static int compare_ts(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return l1->ts_nsec < l2->ts_nsec ? -1 : 1; -} - -static int compare_free_ts(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - return l1->free_ts_nsec < l2->free_ts_nsec ? -1 : 1; -} - -static int compare_release(const void *p1, const void *p2) -{ - const struct block_list *l1 = p1, *l2 = p2; - - if (!l1->free_ts_nsec && !l2->free_ts_nsec) - return 0; - if (l1->free_ts_nsec && l2->free_ts_nsec) - return 0; - return l1->free_ts_nsec ? 1 : -1; -} - -static int compare_cull_condition(const void *p1, const void *p2) -{ - if (cull == 0) - return compare_txt(p1, p2); - if ((cull & CULL_STACKTRACE) && compare_stacktrace(p1, p2)) - return compare_stacktrace(p1, p2); - if ((cull & CULL_PID) && compare_pid(p1, p2)) - return compare_pid(p1, p2); - if ((cull & CULL_TGID) && compare_tgid(p1, p2)) - return compare_tgid(p1, p2); - if ((cull & CULL_COMM) && compare_comm(p1, p2)) - return compare_comm(p1, p2); - if ((cull & CULL_UNRELEASE) && compare_release(p1, p2)) - return compare_release(p1, p2); - if ((cull & CULL_ALLOCATOR) && compare_allocator(p1, p2)) - return compare_allocator(p1, p2); - return 0; -} - -static int compare_sort_condition(const void *p1, const void *p2) -{ - int cmp = 0; - - for (int i = 0; i < sc.size; ++i) - if (cmp == 0) - cmp = sc.signs[i] * sc.cmps[i](p1, p2); - return cmp; -} - -static int search_pattern(regex_t *pattern, char *pattern_str, char *buf) -{ - int err, val_len; - regmatch_t pmatch[2]; - - err = regexec(pattern, buf, 2, pmatch, REG_NOTBOL); - if (err != 0 || pmatch[1].rm_so == -1) { - if (debug_on) - fprintf(stderr, "no matching pattern in %s\n", buf); - return -1; - } - val_len = pmatch[1].rm_eo - pmatch[1].rm_so; - - memcpy(pattern_str, buf + pmatch[1].rm_so, val_len); - - return 0; -} - -static bool check_regcomp(regex_t *pattern, const char *regex) -{ - int err; - - err = regcomp(pattern, regex, REG_EXTENDED | REG_NEWLINE); - if (err != 0 || pattern->re_nsub != 1) { - fprintf(stderr, "Invalid pattern %s code %d\n", regex, err); - return false; - } - return true; -} - -static char **explode(char sep, const char *str, int *size) -{ - int count = 0, len = strlen(str); - int lastindex = -1, j = 0; - - for (int i = 0; i < len; i++) - if (str[i] == sep) - count++; - char **ret = calloc(++count, sizeof(char *)); - - for (int i = 0; i < len; i++) { - if (str[i] == sep) { - ret[j] = calloc(i - lastindex, sizeof(char)); - memcpy(ret[j++], str + lastindex + 1, i - lastindex - 1); - lastindex = i; - } - } - if (lastindex <= len - 1) { - ret[j] = calloc(len - lastindex, sizeof(char)); - memcpy(ret[j++], str + lastindex + 1, strlen(str) - 1 - lastindex); - } - *size = j; - return ret; -} - -static void free_explode(char **arr, int size) -{ - for (int i = 0; i < size; i++) - free(arr[i]); - free(arr); -} - -# define FIELD_BUFF 25 - -static int get_page_num(char *buf) -{ - int order_val; - char order_str[FIELD_BUFF] = {0}; - char *endptr; - - search_pattern(&order_pattern, order_str, buf); - errno = 0; - order_val = strtol(order_str, &endptr, 10); - if (order_val > 64 || errno != 0 || endptr == order_str || *endptr != '\0') { - if (debug_on) - fprintf(stderr, "wrong order in follow buf:\n%s\n", buf); - return 0; - } - - return 1 << order_val; -} - -static pid_t get_pid(char *buf) -{ - pid_t pid; - char pid_str[FIELD_BUFF] = {0}; - char *endptr; - - search_pattern(&pid_pattern, pid_str, buf); - errno = 0; - pid = strtol(pid_str, &endptr, 10); - if (errno != 0 || endptr == pid_str || *endptr != '\0') { - if (debug_on) - fprintf(stderr, "wrong/invalid pid in follow buf:\n%s\n", buf); - return -1; - } - - return pid; - -} - -static pid_t get_tgid(char *buf) -{ - pid_t tgid; - char tgid_str[FIELD_BUFF] = {0}; - char *endptr; - - search_pattern(&tgid_pattern, tgid_str, buf); - errno = 0; - tgid = strtol(tgid_str, &endptr, 10); - if (errno != 0 || endptr == tgid_str || *endptr != '\0') { - if (debug_on) - fprintf(stderr, "wrong/invalid tgid in follow buf:\n%s\n", buf); - return -1; - } - - return tgid; - -} - -static __u64 get_ts_nsec(char *buf) -{ - __u64 ts_nsec; - char ts_nsec_str[FIELD_BUFF] = {0}; - char *endptr; - - search_pattern(&ts_nsec_pattern, ts_nsec_str, buf); - errno = 0; - ts_nsec = strtoull(ts_nsec_str, &endptr, 10); - if (errno != 0 || endptr == ts_nsec_str || *endptr != '\0') { - if (debug_on) - fprintf(stderr, "wrong ts_nsec in follow buf:\n%s\n", buf); - return -1; - } - - return ts_nsec; -} - -static __u64 get_free_ts_nsec(char *buf) -{ - __u64 free_ts_nsec; - char free_ts_nsec_str[FIELD_BUFF] = {0}; - char *endptr; - - search_pattern(&free_ts_nsec_pattern, free_ts_nsec_str, buf); - errno = 0; - free_ts_nsec = strtoull(free_ts_nsec_str, &endptr, 10); - if (errno != 0 || endptr == free_ts_nsec_str || *endptr != '\0') { - if (debug_on) - fprintf(stderr, "wrong free_ts_nsec in follow buf:\n%s\n", buf); - return -1; - } - - return free_ts_nsec; -} - -static char *get_comm(char *buf) -{ - char *comm_str = malloc(TASK_COMM_LEN); - - memset(comm_str, 0, TASK_COMM_LEN); - - search_pattern(&comm_pattern, comm_str, buf); - errno = 0; - if (errno != 0) { - if (debug_on) - fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); - return NULL; - } - - return comm_str; -} - -static int get_arg_type(const char *arg) -{ - if (!strcmp(arg, "pid") || !strcmp(arg, "p")) - return ARG_PID; - else if (!strcmp(arg, "tgid") || !strcmp(arg, "tg")) - return ARG_TGID; - else if (!strcmp(arg, "name") || !strcmp(arg, "n")) - return ARG_COMM; - else if (!strcmp(arg, "stacktrace") || !strcmp(arg, "st")) - return ARG_STACKTRACE; - else if (!strcmp(arg, "free") || !strcmp(arg, "f")) - return ARG_FREE; - else if (!strcmp(arg, "txt") || !strcmp(arg, "T")) - return ARG_TXT; - else if (!strcmp(arg, "free_ts") || !strcmp(arg, "ft")) - return ARG_FREE_TS; - else if (!strcmp(arg, "alloc_ts") || !strcmp(arg, "at")) - return ARG_ALLOC_TS; - else if (!strcmp(arg, "allocator") || !strcmp(arg, "ator")) - return ARG_ALLOCATOR; - else { - return ARG_UNKNOWN; - } -} - -static int get_allocator(const char *buf, const char *migrate_info) -{ - char *tmp, *first_line, *second_line; - int allocator = 0; - - if (strstr(migrate_info, "CMA")) - allocator |= ALLOCATOR_CMA; - if (strstr(migrate_info, "slab")) - allocator |= ALLOCATOR_SLAB; - tmp = strstr(buf, "__vmalloc_node_range"); - if (tmp) { - second_line = tmp; - while (*tmp != '\n') - tmp--; - tmp--; - while (*tmp != '\n') - tmp--; - first_line = ++tmp; - tmp = strstr(tmp, "alloc_pages"); - if (tmp && first_line <= tmp && tmp < second_line) - allocator |= ALLOCATOR_VMALLOC; - } - if (allocator == 0) - allocator = ALLOCATOR_OTHERS; - return allocator; -} - -static bool match_num_list(int num, int *list, int list_size) -{ - for (int i = 0; i < list_size; ++i) - if (list[i] == num) - return true; - return false; -} - -static bool match_str_list(const char *str, char **list, int list_size) -{ - for (int i = 0; i < list_size; ++i) - if (!strcmp(list[i], str)) - return true; - return false; -} - -static bool is_need(char *buf) -{ - __u64 ts_nsec, free_ts_nsec; - - ts_nsec = get_ts_nsec(buf); - free_ts_nsec = get_free_ts_nsec(buf); - - if ((filter & FILTER_UNRELEASE) && free_ts_nsec != 0 && ts_nsec < free_ts_nsec) - return false; - if ((filter & FILTER_PID) && !match_num_list(get_pid(buf), fc.pids, fc.pids_size)) - return false; - if ((filter & FILTER_TGID) && - !match_num_list(get_tgid(buf), fc.tgids, fc.tgids_size)) - return false; - - char *comm = get_comm(buf); - - if ((filter & FILTER_COMM) && - !match_str_list(comm, fc.comms, fc.comms_size)) { - free(comm); - return false; - } - free(comm); - return true; -} - -static bool add_list(char *buf, int len, char *ext_buf) -{ - if (list_size != 0 && - len == list[list_size-1].len && - memcmp(buf, list[list_size-1].txt, len) == 0) { - list[list_size-1].num++; - list[list_size-1].page_num += get_page_num(buf); - return true; - } - if (list_size == max_size) { - fprintf(stderr, "max_size too small??\n"); - return false; - } - if (!is_need(buf)) - return true; - list[list_size].pid = get_pid(buf); - list[list_size].tgid = get_tgid(buf); - list[list_size].comm = get_comm(buf); - list[list_size].txt = malloc(len+1); - if (!list[list_size].txt) { - fprintf(stderr, "Out of memory\n"); - return false; - } - memcpy(list[list_size].txt, buf, len); - list[list_size].txt[len] = 0; - list[list_size].len = len; - list[list_size].num = 1; - list[list_size].page_num = get_page_num(buf); - - list[list_size].stacktrace = strchr(list[list_size].txt, '\n') ?: ""; - if (*list[list_size].stacktrace == '\n') - list[list_size].stacktrace++; - list[list_size].ts_nsec = get_ts_nsec(buf); - list[list_size].free_ts_nsec = get_free_ts_nsec(buf); - list[list_size].allocator = get_allocator(buf, ext_buf); - list_size++; - if (list_size % 1000 == 0) { - printf("loaded %d\r", list_size); - fflush(stdout); - } - return true; -} - -static bool parse_cull_args(const char *arg_str) -{ - int size = 0; - char **args = explode(',', arg_str, &size); - - for (int i = 0; i < size; ++i) { - int arg_type = get_arg_type(args[i]); - - if (arg_type == ARG_PID) - cull |= CULL_PID; - else if (arg_type == ARG_TGID) - cull |= CULL_TGID; - else if (arg_type == ARG_COMM) - cull |= CULL_COMM; - else if (arg_type == ARG_STACKTRACE) - cull |= CULL_STACKTRACE; - else if (arg_type == ARG_FREE) - cull |= CULL_UNRELEASE; - else if (arg_type == ARG_ALLOCATOR) - cull |= CULL_ALLOCATOR; - else { - free_explode(args, size); - return false; - } - } - free_explode(args, size); - if (sc.size == 0) - set_single_cmp(compare_num, SORT_DESC); - return true; -} - -static void set_single_cmp(int (*cmp)(const void *, const void *), int sign) -{ - if (sc.signs == NULL || sc.size < 1) - sc.signs = calloc(1, sizeof(int)); - sc.signs[0] = sign; - if (sc.cmps == NULL || sc.size < 1) - sc.cmps = calloc(1, sizeof(int *)); - sc.cmps[0] = cmp; - sc.size = 1; -} - -static bool parse_sort_args(const char *arg_str) -{ - int size = 0; - - if (sc.size != 0) { /* reset sort_condition */ - free(sc.signs); - free(sc.cmps); - size = 0; - } - - char **args = explode(',', arg_str, &size); - - sc.signs = calloc(size, sizeof(int)); - sc.cmps = calloc(size, sizeof(int *)); - for (int i = 0; i < size; ++i) { - int offset = 0; - - sc.signs[i] = SORT_ASC; - if (args[i][0] == '-' || args[i][0] == '+') { - if (args[i][0] == '-') - sc.signs[i] = SORT_DESC; - offset = 1; - } - - int arg_type = get_arg_type(args[i]+offset); - - if (arg_type == ARG_PID) - sc.cmps[i] = compare_pid; - else if (arg_type == ARG_TGID) - sc.cmps[i] = compare_tgid; - else if (arg_type == ARG_COMM) - sc.cmps[i] = compare_comm; - else if (arg_type == ARG_STACKTRACE) - sc.cmps[i] = compare_stacktrace; - else if (arg_type == ARG_ALLOC_TS) - sc.cmps[i] = compare_ts; - else if (arg_type == ARG_FREE_TS) - sc.cmps[i] = compare_free_ts; - else if (arg_type == ARG_TXT) - sc.cmps[i] = compare_txt; - else if (arg_type == ARG_ALLOCATOR) - sc.cmps[i] = compare_allocator; - else { - free_explode(args, size); - sc.size = 0; - return false; - } - } - sc.size = size; - free_explode(args, size); - return true; -} - -static int *parse_nums_list(char *arg_str, int *list_size) -{ - int size = 0; - char **args = explode(',', arg_str, &size); - int *list = calloc(size, sizeof(int)); - - errno = 0; - for (int i = 0; i < size; ++i) { - char *endptr = NULL; - - list[i] = strtol(args[i], &endptr, 10); - if (errno != 0 || endptr == args[i] || *endptr != '\0') { - free(list); - return NULL; - } - } - *list_size = size; - free_explode(args, size); - return list; -} - -static void print_allocator(FILE *out, int allocator) -{ - fprintf(out, "allocated by "); - if (allocator & ALLOCATOR_CMA) - fprintf(out, "CMA "); - if (allocator & ALLOCATOR_SLAB) - fprintf(out, "SLAB "); - if (allocator & ALLOCATOR_VMALLOC) - fprintf(out, "VMALLOC "); - if (allocator & ALLOCATOR_OTHERS) - fprintf(out, "OTHERS "); -} - -#define BUF_SIZE (128 * 1024) - -static void usage(void) -{ - printf("Usage: ./page_owner_sort [OPTIONS] \n" - "-m\t\tSort by total memory.\n" - "-s\t\tSort by the stack trace.\n" - "-t\t\tSort by times (default).\n" - "-p\t\tSort by pid.\n" - "-P\t\tSort by tgid.\n" - "-n\t\tSort by task command name.\n" - "-a\t\tSort by memory allocate time.\n" - "-r\t\tSort by memory release time.\n" - "-f\t\tFilter out the information of blocks whose memory has been released.\n" - "-d\t\tPrint debug information.\n" - "--pid \tSelect by pid. This selects the information of blocks whose process ID numbers appear in .\n" - "--tgid \tSelect by tgid. This selects the information of blocks whose Thread Group ID numbers appear in .\n" - "--name \n\t\tSelect by command name. This selects the information of blocks whose command name appears in .\n" - "--cull \tCull by user-defined rules. is a single argument in the form of a comma-separated list with some common fields predefined\n" - "--sort \tSpecify sort order as: [+|-]key[,[+|-]key[,...]]\n" - ); -} - -int main(int argc, char **argv) -{ - FILE *fin, *fout; - char *buf, *ext_buf; - int i, count; - struct stat st; - int opt; - struct option longopts[] = { - { "pid", required_argument, NULL, 1 }, - { "tgid", required_argument, NULL, 2 }, - { "name", required_argument, NULL, 3 }, - { "cull", required_argument, NULL, 4 }, - { "sort", required_argument, NULL, 5 }, - { 0, 0, 0, 0}, - }; - - while ((opt = getopt_long(argc, argv, "adfmnprstP", longopts, NULL)) != -1) - switch (opt) { - case 'a': - set_single_cmp(compare_ts, SORT_ASC); - break; - case 'd': - debug_on = true; - break; - case 'f': - filter = filter | FILTER_UNRELEASE; - break; - case 'm': - set_single_cmp(compare_page_num, SORT_DESC); - break; - case 'p': - set_single_cmp(compare_pid, SORT_ASC); - break; - case 'r': - set_single_cmp(compare_free_ts, SORT_ASC); - break; - case 's': - set_single_cmp(compare_stacktrace, SORT_ASC); - break; - case 't': - set_single_cmp(compare_num, SORT_DESC); - break; - case 'P': - set_single_cmp(compare_tgid, SORT_ASC); - break; - case 'n': - set_single_cmp(compare_comm, SORT_ASC); - break; - case 1: - filter = filter | FILTER_PID; - fc.pids = parse_nums_list(optarg, &fc.pids_size); - if (fc.pids == NULL) { - fprintf(stderr, "wrong/invalid pid in from the command line:%s\n", - optarg); - exit(1); - } - break; - case 2: - filter = filter | FILTER_TGID; - fc.tgids = parse_nums_list(optarg, &fc.tgids_size); - if (fc.tgids == NULL) { - fprintf(stderr, "wrong/invalid tgid in from the command line:%s\n", - optarg); - exit(1); - } - break; - case 3: - filter = filter | FILTER_COMM; - fc.comms = explode(',', optarg, &fc.comms_size); - break; - case 4: - if (!parse_cull_args(optarg)) { - fprintf(stderr, "wrong argument after --cull option:%s\n", - optarg); - exit(1); - } - break; - case 5: - if (!parse_sort_args(optarg)) { - fprintf(stderr, "wrong argument after --sort option:%s\n", - optarg); - exit(1); - } - break; - default: - usage(); - exit(1); - } - - if (optind >= (argc - 1)) { - usage(); - exit(1); - } - - fin = fopen(argv[optind], "r"); - fout = fopen(argv[optind + 1], "w"); - if (!fin || !fout) { - usage(); - perror("open: "); - exit(1); - } - - if (!check_regcomp(&order_pattern, "order\\s*([0-9]*),")) - goto out_order; - if (!check_regcomp(&pid_pattern, "pid\\s*([0-9]*),")) - goto out_pid; - if (!check_regcomp(&tgid_pattern, "tgid\\s*([0-9]*) ")) - goto out_tgid; - if (!check_regcomp(&comm_pattern, "tgid\\s*[0-9]*\\s*\\((.*)\\),\\s*ts")) - goto out_comm; - if (!check_regcomp(&ts_nsec_pattern, "ts\\s*([0-9]*)\\s*ns,")) - goto out_ts; - if (!check_regcomp(&free_ts_nsec_pattern, "free_ts\\s*([0-9]*)\\s*ns")) - goto out_free_ts; - - fstat(fileno(fin), &st); - max_size = st.st_size / 100; /* hack ... */ - - list = malloc(max_size * sizeof(*list)); - buf = malloc(BUF_SIZE); - ext_buf = malloc(BUF_SIZE); - if (!list || !buf || !ext_buf) { - fprintf(stderr, "Out of memory\n"); - goto out_free; - } - - for ( ; ; ) { - int buf_len = read_block(buf, ext_buf, BUF_SIZE, fin); - - if (buf_len < 0) - break; - if (!add_list(buf, buf_len, ext_buf)) - goto out_free; - } - - printf("loaded %d\n", list_size); - - printf("sorting ....\n"); - - qsort(list, list_size, sizeof(list[0]), compare_cull_condition); - - printf("culling\n"); - - for (i = count = 0; i < list_size; i++) { - if (count == 0 || - compare_cull_condition((void *)(&list[count-1]), (void *)(&list[i])) != 0) { - list[count++] = list[i]; - } else { - list[count-1].num += list[i].num; - list[count-1].page_num += list[i].page_num; - } - } - - qsort(list, count, sizeof(list[0]), compare_sort_condition); - - for (i = 0; i < count; i++) { - if (cull == 0) { - fprintf(fout, "%d times, %d pages, ", list[i].num, list[i].page_num); - print_allocator(fout, list[i].allocator); - fprintf(fout, ":\n%s\n", list[i].txt); - } - else { - fprintf(fout, "%d times, %d pages", - list[i].num, list[i].page_num); - if (cull & CULL_PID || filter & FILTER_PID) - fprintf(fout, ", PID %d", list[i].pid); - if (cull & CULL_TGID || filter & FILTER_TGID) - fprintf(fout, ", TGID %d", list[i].pid); - if (cull & CULL_COMM || filter & FILTER_COMM) - fprintf(fout, ", task_comm_name: %s", list[i].comm); - if (cull & CULL_ALLOCATOR) { - fprintf(fout, ", "); - print_allocator(fout, list[i].allocator); - } - if (cull & CULL_UNRELEASE) - fprintf(fout, " (%s)", - list[i].free_ts_nsec ? "UNRELEASED" : "RELEASED"); - if (cull & CULL_STACKTRACE) - fprintf(fout, ":\n%s", list[i].stacktrace); - fprintf(fout, "\n"); - } - } - -out_free: - if (ext_buf) - free(ext_buf); - if (buf) - free(buf); - if (list) - free(list); -out_free_ts: - regfree(&free_ts_nsec_pattern); -out_ts: - regfree(&ts_nsec_pattern); -out_comm: - regfree(&comm_pattern); -out_tgid: - regfree(&tgid_pattern); -out_pid: - regfree(&pid_pattern); -out_order: - regfree(&order_pattern); - - return 0; -} diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh deleted file mode 100644 index 873a892147e5..000000000000 --- a/tools/vm/slabinfo-gnuplot.sh +++ /dev/null @@ -1,268 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only - -# Sergey Senozhatsky, 2015 -# sergey.senozhatsky.work@gmail.com -# - - -# This program is intended to plot a `slabinfo -X' stats, collected, -# for example, using the following command: -# while [ 1 ]; do slabinfo -X >> stats; sleep 1; done -# -# Use `slabinfo-gnuplot.sh stats' to pre-process collected records -# and generate graphs (totals, slabs sorted by size, slabs sorted -# by size). -# -# Graphs can be [individually] regenerate with different ranges and -# size (-r %d,%d and -s %d,%d options). -# -# To visually compare N `totals' graphs, do -# slabinfo-gnuplot.sh -t FILE1-totals FILE2-totals ... FILEN-totals -# - -min_slab_name_size=11 -xmin=0 -xmax=0 -width=1500 -height=700 -mode=preprocess - -usage() -{ - echo "Usage: [-s W,H] [-r MIN,MAX] [-t|-l] FILE1 [FILE2 ..]" - echo "FILEs must contain 'slabinfo -X' samples" - echo "-t - plot totals for FILE(s)" - echo "-l - plot slabs stats for FILE(s)" - echo "-s %d,%d - set image width and height" - echo "-r %d,%d - use data samples from a given range" -} - -check_file_exist() -{ - if [ ! -f "$1" ]; then - echo "File '$1' does not exist" - exit 1 - fi -} - -do_slabs_plotting() -{ - local file=$1 - local out_file - local range="every ::$xmin" - local xtic="" - local xtic_rotate="norotate" - local lines=2000000 - local wc_lines - - check_file_exist "$file" - - out_file=`basename "$file"` - if [ $xmax -ne 0 ]; then - range="$range::$xmax" - lines=$((xmax-xmin)) - fi - - wc_lines=`cat "$file" | wc -l` - if [ $? -ne 0 ] || [ "$wc_lines" -eq 0 ] ; then - wc_lines=$lines - fi - - if [ "$wc_lines" -lt "$lines" ]; then - lines=$wc_lines - fi - - if [ $((width / lines)) -gt $min_slab_name_size ]; then - xtic=":xtic(1)" - xtic_rotate=90 - fi - -gnuplot -p << EOF -#!/usr/bin/env gnuplot - -set terminal png enhanced size $width,$height large -set output '$out_file.png' -set autoscale xy -set xlabel 'samples' -set ylabel 'bytes' -set style histogram columnstacked title textcolor lt -1 -set style fill solid 0.15 -set xtics rotate $xtic_rotate -set key left above Left title reverse - -plot "$file" $range u 2$xtic title 'SIZE' with boxes,\ - '' $range u 3 title 'LOSS' with boxes -EOF - - if [ $? -eq 0 ]; then - echo "$out_file.png" - fi -} - -do_totals_plotting() -{ - local gnuplot_cmd="" - local range="every ::$xmin" - local file="" - - if [ $xmax -ne 0 ]; then - range="$range::$xmax" - fi - - for i in "${t_files[@]}"; do - check_file_exist "$i" - - file="$file"`basename "$i"` - gnuplot_cmd="$gnuplot_cmd '$i' $range using 1 title\ - '$i Memory usage' with lines," - gnuplot_cmd="$gnuplot_cmd '' $range using 2 title \ - '$i Loss' with lines," - done - -gnuplot -p << EOF -#!/usr/bin/env gnuplot - -set terminal png enhanced size $width,$height large -set autoscale xy -set output '$file.png' -set xlabel 'samples' -set ylabel 'bytes' -set key left above Left title reverse - -plot $gnuplot_cmd -EOF - - if [ $? -eq 0 ]; then - echo "$file.png" - fi -} - -do_preprocess() -{ - local out - local lines - local in=$1 - - check_file_exist "$in" - - # use only 'TOP' slab (biggest memory usage or loss) - let lines=3 - out=`basename "$in"`"-slabs-by-loss" - `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - grep -E -iv '\-\-|Name|Slabs'\ - | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` - if [ $? -eq 0 ]; then - do_slabs_plotting "$out" - fi - - let lines=3 - out=`basename "$in"`"-slabs-by-size" - `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - grep -E -iv '\-\-|Name|Slabs'\ - | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` - if [ $? -eq 0 ]; then - do_slabs_plotting "$out" - fi - - out=`basename "$in"`"-totals" - `cat "$in" | grep "Memory used" |\ - awk '{print $3" "$7}' > "$out"` - if [ $? -eq 0 ]; then - t_files[0]=$out - do_totals_plotting - fi -} - -parse_opts() -{ - local opt - - while getopts "tlr::s::h" opt; do - case $opt in - t) - mode=totals - ;; - l) - mode=slabs - ;; - s) - array=(${OPTARG//,/ }) - width=${array[0]} - height=${array[1]} - ;; - r) - array=(${OPTARG//,/ }) - xmin=${array[0]} - xmax=${array[1]} - ;; - h) - usage - exit 0 - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - exit 1 - ;; - :) - echo "-$OPTARG requires an argument." >&2 - exit 1 - ;; - esac - done - - return $OPTIND -} - -parse_args() -{ - local idx=0 - local p - - for p in "$@"; do - case $mode in - preprocess) - files[$idx]=$p - idx=$idx+1 - ;; - totals) - t_files[$idx]=$p - idx=$idx+1 - ;; - slabs) - files[$idx]=$p - idx=$idx+1 - ;; - esac - done -} - -parse_opts "$@" -argstart=$? -parse_args "${@:$argstart}" - -if [ ${#files[@]} -eq 0 ] && [ ${#t_files[@]} -eq 0 ]; then - usage - exit 1 -fi - -case $mode in - preprocess) - for i in "${files[@]}"; do - do_preprocess "$i" - done - ;; - totals) - do_totals_plotting - ;; - slabs) - for i in "${files[@]}"; do - do_slabs_plotting "$i" - done - ;; - *) - echo "Unknown mode $mode" >&2 - usage - exit 1 - ;; -esac diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c deleted file mode 100644 index cfaeaea71042..000000000000 --- a/tools/vm/slabinfo.c +++ /dev/null @@ -1,1544 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Slabinfo: Tool to get reports about slabs - * - * (C) 2007 sgi, Christoph Lameter - * (C) 2011 Linux Foundation, Christoph Lameter - * - * Compile with: - * - * gcc -o slabinfo slabinfo.c - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_SLABS 500 -#define MAX_ALIASES 500 -#define MAX_NODES 1024 - -struct slabinfo { - char *name; - int alias; - int refs; - int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; - unsigned int hwcache_align, object_size, objs_per_slab; - unsigned int sanity_checks, slab_size, store_user, trace; - int order, poison, reclaim_account, red_zone; - unsigned long partial, objects, slabs, objects_partial, objects_total; - unsigned long alloc_fastpath, alloc_slowpath; - unsigned long free_fastpath, free_slowpath; - unsigned long free_frozen, free_add_partial, free_remove_partial; - unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; - unsigned long cpuslab_flush, deactivate_full, deactivate_empty; - unsigned long deactivate_to_head, deactivate_to_tail; - unsigned long deactivate_remote_frees, order_fallback; - unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; - unsigned long alloc_node_mismatch, deactivate_bypass; - unsigned long cpu_partial_alloc, cpu_partial_free; - int numa[MAX_NODES]; - int numa_partial[MAX_NODES]; -} slabinfo[MAX_SLABS]; - -struct aliasinfo { - char *name; - char *ref; - struct slabinfo *slab; -} aliasinfo[MAX_ALIASES]; - -int slabs; -int actual_slabs; -int aliases; -int alias_targets; -int highest_node; - -char buffer[4096]; - -int show_empty; -int show_report; -int show_alias; -int show_slab; -int skip_zero = 1; -int show_numa; -int show_track; -int show_first_alias; -int validate; -int shrink; -int show_inverted; -int show_single_ref; -int show_totals; -int sort_size; -int sort_active; -int set_debug; -int show_ops; -int sort_partial; -int show_activity; -int output_lines = -1; -int sort_loss; -int extended_totals; -int show_bytes; -int unreclaim_only; - -/* Debug options */ -int sanity; -int redzone; -int poison; -int tracking; -int tracing; - -int page_size; - -regex_t pattern; - -static void fatal(const char *x, ...) -{ - va_list ap; - - va_start(ap, x); - vfprintf(stderr, x, ap); - va_end(ap); - exit(EXIT_FAILURE); -} - -static void usage(void) -{ - printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n" - "slabinfo [-aABDefhilLnoPrsStTUvXz1] [N=K] [-dafzput] [slab-regexp]\n" - "-a|--aliases Show aliases\n" - "-A|--activity Most active slabs first\n" - "-B|--Bytes Show size in bytes\n" - "-D|--display-active Switch line format to activity\n" - "-e|--empty Show empty slabs\n" - "-f|--first-alias Show first alias\n" - "-h|--help Show usage information\n" - "-i|--inverted Inverted list\n" - "-l|--slabs Show slabs\n" - "-L|--Loss Sort by loss\n" - "-n|--numa Show NUMA information\n" - "-N|--lines=K Show the first K slabs\n" - "-o|--ops Show kmem_cache_ops\n" - "-P|--partial Sort by number of partial slabs\n" - "-r|--report Detailed report on single slabs\n" - "-s|--shrink Shrink slabs\n" - "-S|--Size Sort by size\n" - "-t|--tracking Show alloc/free information\n" - "-T|--Totals Show summary information\n" - "-U|--Unreclaim Show unreclaimable slabs only\n" - "-v|--validate Validate slabs\n" - "-X|--Xtotals Show extended summary information\n" - "-z|--zero Include empty slabs\n" - "-1|--1ref Single reference\n" - - "\n" - "-d | --debug Switch off all debug options\n" - "-da | --debug=a Switch on all debug options (--debug=FZPU)\n" - - "\n" - "-d[afzput] | --debug=[afzput]\n" - " f | F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n" - " z | Z Redzoning\n" - " p | P Poisoning\n" - " u | U Tracking\n" - " t | T Tracing\n" - - "\nSorting options (--Loss, --Size, --Partial) are mutually exclusive\n" - ); -} - -static unsigned long read_obj(const char *name) -{ - FILE *f = fopen(name, "r"); - - if (!f) { - buffer[0] = 0; - if (errno == EACCES) - fatal("%s, Try using superuser\n", strerror(errno)); - } else { - if (!fgets(buffer, sizeof(buffer), f)) - buffer[0] = 0; - fclose(f); - if (buffer[strlen(buffer)] == '\n') - buffer[strlen(buffer)] = 0; - } - return strlen(buffer); -} - - -/* - * Get the contents of an attribute - */ -static unsigned long get_obj(const char *name) -{ - if (!read_obj(name)) - return 0; - - return atol(buffer); -} - -static unsigned long get_obj_and_str(const char *name, char **x) -{ - unsigned long result = 0; - char *p; - - *x = NULL; - - if (!read_obj(name)) { - x = NULL; - return 0; - } - result = strtoul(buffer, &p, 10); - while (*p == ' ') - p++; - if (*p) - *x = strdup(p); - return result; -} - -static void set_obj(struct slabinfo *s, const char *name, int n) -{ - char x[100]; - FILE *f; - - snprintf(x, 100, "%s/%s", s->name, name); - f = fopen(x, "w"); - if (!f) - fatal("Cannot write to %s\n", x); - - fprintf(f, "%d\n", n); - fclose(f); -} - -static unsigned long read_slab_obj(struct slabinfo *s, const char *name) -{ - char x[100]; - FILE *f; - size_t l; - - snprintf(x, 100, "%s/%s", s->name, name); - f = fopen(x, "r"); - if (!f) { - buffer[0] = 0; - l = 0; - } else { - l = fread(buffer, 1, sizeof(buffer), f); - buffer[l] = 0; - fclose(f); - } - return l; -} - -static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name) -{ - char x[128]; - FILE *f; - size_t l; - - snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name); - f = fopen(x, "r"); - if (!f) { - buffer[0] = 0; - l = 0; - } else { - l = fread(buffer, 1, sizeof(buffer), f); - buffer[l] = 0; - fclose(f); - } - return l; -} - -/* - * Put a size string together - */ -static int store_size(char *buffer, unsigned long value) -{ - unsigned long divisor = 1; - char trailer = 0; - int n; - - if (!show_bytes) { - if (value > 1000000000UL) { - divisor = 100000000UL; - trailer = 'G'; - } else if (value > 1000000UL) { - divisor = 100000UL; - trailer = 'M'; - } else if (value > 1000UL) { - divisor = 100; - trailer = 'K'; - } - } - - value /= divisor; - n = sprintf(buffer, "%ld",value); - if (trailer) { - buffer[n] = trailer; - n++; - buffer[n] = 0; - } - if (divisor != 1) { - memmove(buffer + n - 2, buffer + n - 3, 4); - buffer[n-2] = '.'; - n++; - } - return n; -} - -static void decode_numa_list(int *numa, char *t) -{ - int node; - int nr; - - memset(numa, 0, MAX_NODES * sizeof(int)); - - if (!t) - return; - - while (*t == 'N') { - t++; - node = strtoul(t, &t, 10); - if (*t == '=') { - t++; - nr = strtoul(t, &t, 10); - numa[node] = nr; - if (node > highest_node) - highest_node = node; - } - while (*t == ' ') - t++; - } -} - -static void slab_validate(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - set_obj(s, "validate", 1); -} - -static void slab_shrink(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - set_obj(s, "shrink", 1); -} - -int line = 0; - -static void first_line(void) -{ - if (show_activity) - printf("Name Objects Alloc Free" - " %%Fast Fallb O CmpX UL\n"); - else - printf("Name Objects Objsize %s " - "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n", - sort_loss ? " Loss" : "Space"); -} - -/* - * Find the shortest alias of a slab - */ -static struct aliasinfo *find_one_alias(struct slabinfo *find) -{ - struct aliasinfo *a; - struct aliasinfo *best = NULL; - - for(a = aliasinfo;a < aliasinfo + aliases; a++) { - if (a->slab == find && - (!best || strlen(best->name) < strlen(a->name))) { - best = a; - if (strncmp(a->name,"kmall", 5) == 0) - return best; - } - } - return best; -} - -static unsigned long slab_size(struct slabinfo *s) -{ - return s->slabs * (page_size << s->order); -} - -static unsigned long slab_activity(struct slabinfo *s) -{ - return s->alloc_fastpath + s->free_fastpath + - s->alloc_slowpath + s->free_slowpath; -} - -static unsigned long slab_waste(struct slabinfo *s) -{ - return slab_size(s) - s->objects * s->object_size; -} - -static void slab_numa(struct slabinfo *s, int mode) -{ - int node; - - if (strcmp(s->name, "*") == 0) - return; - - if (!highest_node) { - printf("\n%s: No NUMA information available.\n", s->name); - return; - } - - if (skip_zero && !s->slabs) - return; - - if (!line) { - printf("\n%-21s:", mode ? "NUMA nodes" : "Slab"); - for(node = 0; node <= highest_node; node++) - printf(" %4d", node); - printf("\n----------------------"); - for(node = 0; node <= highest_node; node++) - printf("-----"); - printf("\n"); - } - printf("%-21s ", mode ? "All slabs" : s->name); - for(node = 0; node <= highest_node; node++) { - char b[20]; - - store_size(b, s->numa[node]); - printf(" %4s", b); - } - printf("\n"); - if (mode) { - printf("%-21s ", "Partial slabs"); - for(node = 0; node <= highest_node; node++) { - char b[20]; - - store_size(b, s->numa_partial[node]); - printf(" %4s", b); - } - printf("\n"); - } - line++; -} - -static void show_tracking(struct slabinfo *s) -{ - printf("\n%s: Kernel object allocation\n", s->name); - printf("-----------------------------------------------------------------------\n"); - if (read_debug_slab_obj(s, "alloc_traces")) - printf("%s", buffer); - else if (read_slab_obj(s, "alloc_calls")) - printf("%s", buffer); - else - printf("No Data\n"); - - printf("\n%s: Kernel object freeing\n", s->name); - printf("------------------------------------------------------------------------\n"); - if (read_debug_slab_obj(s, "free_traces")) - printf("%s", buffer); - else if (read_slab_obj(s, "free_calls")) - printf("%s", buffer); - else - printf("No Data\n"); - -} - -static void ops(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - if (read_slab_obj(s, "ops")) { - printf("\n%s: kmem_cache operations\n", s->name); - printf("--------------------------------------------\n"); - printf("%s", buffer); - } else - printf("\n%s has no kmem_cache operations\n", s->name); -} - -static const char *onoff(int x) -{ - if (x) - return "On "; - return "Off"; -} - -static void slab_stats(struct slabinfo *s) -{ - unsigned long total_alloc; - unsigned long total_free; - unsigned long total; - - if (!s->alloc_slab) - return; - - total_alloc = s->alloc_fastpath + s->alloc_slowpath; - total_free = s->free_fastpath + s->free_slowpath; - - if (!total_alloc) - return; - - printf("\n"); - printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); - printf("--------------------------------------------------\n"); - printf("Fastpath %8lu %8lu %3lu %3lu\n", - s->alloc_fastpath, s->free_fastpath, - s->alloc_fastpath * 100 / total_alloc, - total_free ? s->free_fastpath * 100 / total_free : 0); - printf("Slowpath %8lu %8lu %3lu %3lu\n", - total_alloc - s->alloc_fastpath, s->free_slowpath, - (total_alloc - s->alloc_fastpath) * 100 / total_alloc, - total_free ? s->free_slowpath * 100 / total_free : 0); - printf("Page Alloc %8lu %8lu %3lu %3lu\n", - s->alloc_slab, s->free_slab, - s->alloc_slab * 100 / total_alloc, - total_free ? s->free_slab * 100 / total_free : 0); - printf("Add partial %8lu %8lu %3lu %3lu\n", - s->deactivate_to_head + s->deactivate_to_tail, - s->free_add_partial, - (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, - total_free ? s->free_add_partial * 100 / total_free : 0); - printf("Remove partial %8lu %8lu %3lu %3lu\n", - s->alloc_from_partial, s->free_remove_partial, - s->alloc_from_partial * 100 / total_alloc, - total_free ? s->free_remove_partial * 100 / total_free : 0); - - printf("Cpu partial list %8lu %8lu %3lu %3lu\n", - s->cpu_partial_alloc, s->cpu_partial_free, - s->cpu_partial_alloc * 100 / total_alloc, - total_free ? s->cpu_partial_free * 100 / total_free : 0); - - printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", - s->deactivate_remote_frees, s->free_frozen, - s->deactivate_remote_frees * 100 / total_alloc, - total_free ? s->free_frozen * 100 / total_free : 0); - - printf("Total %8lu %8lu\n\n", total_alloc, total_free); - - if (s->cpuslab_flush) - printf("Flushes %8lu\n", s->cpuslab_flush); - - total = s->deactivate_full + s->deactivate_empty + - s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass; - - if (total) { - printf("\nSlab Deactivation Occurrences %%\n"); - printf("-------------------------------------------------\n"); - printf("Slab full %7lu %3lu%%\n", - s->deactivate_full, (s->deactivate_full * 100) / total); - printf("Slab empty %7lu %3lu%%\n", - s->deactivate_empty, (s->deactivate_empty * 100) / total); - printf("Moved to head of partial list %7lu %3lu%%\n", - s->deactivate_to_head, (s->deactivate_to_head * 100) / total); - printf("Moved to tail of partial list %7lu %3lu%%\n", - s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); - printf("Deactivation bypass %7lu %3lu%%\n", - s->deactivate_bypass, (s->deactivate_bypass * 100) / total); - printf("Refilled from foreign frees %7lu %3lu%%\n", - s->alloc_refill, (s->alloc_refill * 100) / total); - printf("Node mismatch %7lu %3lu%%\n", - s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total); - } - - if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) { - printf("\nCmpxchg_double Looping\n------------------------\n"); - printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n", - s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail); - } -} - -static void report(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - printf("\nSlabcache: %-15s Aliases: %2d Order : %2d Objects: %lu\n", - s->name, s->aliases, s->order, s->objects); - if (s->hwcache_align) - printf("** Hardware cacheline aligned\n"); - if (s->cache_dma) - printf("** Memory is allocated in a special DMA zone\n"); - if (s->destroy_by_rcu) - printf("** Slabs are destroyed via RCU\n"); - if (s->reclaim_account) - printf("** Reclaim accounting active\n"); - - printf("\nSizes (bytes) Slabs Debug Memory\n"); - printf("------------------------------------------------------------------------\n"); - printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n", - s->object_size, s->slabs, onoff(s->sanity_checks), - s->slabs * (page_size << s->order)); - printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n", - s->slab_size, s->slabs - s->partial - s->cpu_slabs, - onoff(s->red_zone), s->objects * s->object_size); - printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n", - page_size << s->order, s->partial, onoff(s->poison), - s->slabs * (page_size << s->order) - s->objects * s->object_size); - printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n", - s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user), - (s->slab_size - s->object_size) * s->objects); - printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n", - s->align, s->objs_per_slab, onoff(s->trace), - ((page_size << s->order) - s->objs_per_slab * s->slab_size) * - s->slabs); - - ops(s); - show_tracking(s); - slab_numa(s, 1); - slab_stats(s); -} - -static void slabcache(struct slabinfo *s) -{ - char size_str[20]; - char dist_str[40]; - char flags[20]; - char *p = flags; - - if (strcmp(s->name, "*") == 0) - return; - - if (unreclaim_only && s->reclaim_account) - return; - - if (actual_slabs == 1) { - report(s); - return; - } - - if (skip_zero && !show_empty && !s->slabs) - return; - - if (show_empty && s->slabs) - return; - - if (sort_loss == 0) - store_size(size_str, slab_size(s)); - else - store_size(size_str, slab_waste(s)); - snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, - s->partial, s->cpu_slabs); - - if (!line++) - first_line(); - - if (s->aliases) - *p++ = '*'; - if (s->cache_dma) - *p++ = 'd'; - if (s->hwcache_align) - *p++ = 'A'; - if (s->poison) - *p++ = 'P'; - if (s->reclaim_account) - *p++ = 'a'; - if (s->red_zone) - *p++ = 'Z'; - if (s->sanity_checks) - *p++ = 'F'; - if (s->store_user) - *p++ = 'U'; - if (s->trace) - *p++ = 'T'; - - *p = 0; - if (show_activity) { - unsigned long total_alloc; - unsigned long total_free; - - total_alloc = s->alloc_fastpath + s->alloc_slowpath; - total_free = s->free_fastpath + s->free_slowpath; - - printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d %4ld %4ld\n", - s->name, s->objects, - total_alloc, total_free, - total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, - total_free ? (s->free_fastpath * 100 / total_free) : 0, - s->order_fallback, s->order, s->cmpxchg_double_fail, - s->cmpxchg_double_cpu_fail); - } else { - printf("%-21s %8ld %7d %15s %14s %4d %1d %3ld %3ld %s\n", - s->name, s->objects, s->object_size, size_str, dist_str, - s->objs_per_slab, s->order, - s->slabs ? (s->partial * 100) / s->slabs : 100, - s->slabs ? (s->objects * s->object_size * 100) / - (s->slabs * (page_size << s->order)) : 100, - flags); - } -} - -/* - * Analyze debug options. Return false if something is amiss. - */ -static int debug_opt_scan(char *opt) -{ - if (!opt || !opt[0] || strcmp(opt, "-") == 0) - return 1; - - if (strcasecmp(opt, "a") == 0) { - sanity = 1; - poison = 1; - redzone = 1; - tracking = 1; - return 1; - } - - for ( ; *opt; opt++) - switch (*opt) { - case 'F' : case 'f': - if (sanity) - return 0; - sanity = 1; - break; - case 'P' : case 'p': - if (poison) - return 0; - poison = 1; - break; - - case 'Z' : case 'z': - if (redzone) - return 0; - redzone = 1; - break; - - case 'U' : case 'u': - if (tracking) - return 0; - tracking = 1; - break; - - case 'T' : case 't': - if (tracing) - return 0; - tracing = 1; - break; - default: - return 0; - } - return 1; -} - -static int slab_empty(struct slabinfo *s) -{ - if (s->objects > 0) - return 0; - - /* - * We may still have slabs even if there are no objects. Shrinking will - * remove them. - */ - if (s->slabs != 0) - set_obj(s, "shrink", 1); - - return 1; -} - -static void slab_debug(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - if (sanity && !s->sanity_checks) { - set_obj(s, "sanity_checks", 1); - } - if (!sanity && s->sanity_checks) { - if (slab_empty(s)) - set_obj(s, "sanity_checks", 0); - else - fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name); - } - if (redzone && !s->red_zone) { - if (slab_empty(s)) - set_obj(s, "red_zone", 1); - else - fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name); - } - if (!redzone && s->red_zone) { - if (slab_empty(s)) - set_obj(s, "red_zone", 0); - else - fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name); - } - if (poison && !s->poison) { - if (slab_empty(s)) - set_obj(s, "poison", 1); - else - fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name); - } - if (!poison && s->poison) { - if (slab_empty(s)) - set_obj(s, "poison", 0); - else - fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name); - } - if (tracking && !s->store_user) { - if (slab_empty(s)) - set_obj(s, "store_user", 1); - else - fprintf(stderr, "%s not empty cannot enable tracking\n", s->name); - } - if (!tracking && s->store_user) { - if (slab_empty(s)) - set_obj(s, "store_user", 0); - else - fprintf(stderr, "%s not empty cannot disable tracking\n", s->name); - } - if (tracing && !s->trace) { - if (slabs == 1) - set_obj(s, "trace", 1); - else - fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); - } - if (!tracing && s->trace) - set_obj(s, "trace", 1); -} - -static void totals(void) -{ - struct slabinfo *s; - - int used_slabs = 0; - char b1[20], b2[20], b3[20], b4[20]; - unsigned long long max = 1ULL << 63; - - /* Object size */ - unsigned long long min_objsize = max, max_objsize = 0, avg_objsize; - - /* Number of partial slabs in a slabcache */ - unsigned long long min_partial = max, max_partial = 0, - avg_partial, total_partial = 0; - - /* Number of slabs in a slab cache */ - unsigned long long min_slabs = max, max_slabs = 0, - avg_slabs, total_slabs = 0; - - /* Size of the whole slab */ - unsigned long long min_size = max, max_size = 0, - avg_size, total_size = 0; - - /* Bytes used for object storage in a slab */ - unsigned long long min_used = max, max_used = 0, - avg_used, total_used = 0; - - /* Waste: Bytes used for alignment and padding */ - unsigned long long min_waste = max, max_waste = 0, - avg_waste, total_waste = 0; - /* Number of objects in a slab */ - unsigned long long min_objects = max, max_objects = 0, - avg_objects, total_objects = 0; - /* Waste per object */ - unsigned long long min_objwaste = max, - max_objwaste = 0, avg_objwaste, - total_objwaste = 0; - - /* Memory per object */ - unsigned long long min_memobj = max, - max_memobj = 0, avg_memobj, - total_objsize = 0; - - /* Percentage of partial slabs per slab */ - unsigned long min_ppart = 100, max_ppart = 0, - avg_ppart, total_ppart = 0; - - /* Number of objects in partial slabs */ - unsigned long min_partobj = max, max_partobj = 0, - avg_partobj, total_partobj = 0; - - /* Percentage of partial objects of all objects in a slab */ - unsigned long min_ppartobj = 100, max_ppartobj = 0, - avg_ppartobj, total_ppartobj = 0; - - - for (s = slabinfo; s < slabinfo + slabs; s++) { - unsigned long long size; - unsigned long used; - unsigned long long wasted; - unsigned long long objwaste; - unsigned long percentage_partial_slabs; - unsigned long percentage_partial_objs; - - if (!s->slabs || !s->objects) - continue; - - used_slabs++; - - size = slab_size(s); - used = s->objects * s->object_size; - wasted = size - used; - objwaste = s->slab_size - s->object_size; - - percentage_partial_slabs = s->partial * 100 / s->slabs; - if (percentage_partial_slabs > 100) - percentage_partial_slabs = 100; - - percentage_partial_objs = s->objects_partial * 100 - / s->objects; - - if (percentage_partial_objs > 100) - percentage_partial_objs = 100; - - if (s->object_size < min_objsize) - min_objsize = s->object_size; - if (s->partial < min_partial) - min_partial = s->partial; - if (s->slabs < min_slabs) - min_slabs = s->slabs; - if (size < min_size) - min_size = size; - if (wasted < min_waste) - min_waste = wasted; - if (objwaste < min_objwaste) - min_objwaste = objwaste; - if (s->objects < min_objects) - min_objects = s->objects; - if (used < min_used) - min_used = used; - if (s->objects_partial < min_partobj) - min_partobj = s->objects_partial; - if (percentage_partial_slabs < min_ppart) - min_ppart = percentage_partial_slabs; - if (percentage_partial_objs < min_ppartobj) - min_ppartobj = percentage_partial_objs; - if (s->slab_size < min_memobj) - min_memobj = s->slab_size; - - if (s->object_size > max_objsize) - max_objsize = s->object_size; - if (s->partial > max_partial) - max_partial = s->partial; - if (s->slabs > max_slabs) - max_slabs = s->slabs; - if (size > max_size) - max_size = size; - if (wasted > max_waste) - max_waste = wasted; - if (objwaste > max_objwaste) - max_objwaste = objwaste; - if (s->objects > max_objects) - max_objects = s->objects; - if (used > max_used) - max_used = used; - if (s->objects_partial > max_partobj) - max_partobj = s->objects_partial; - if (percentage_partial_slabs > max_ppart) - max_ppart = percentage_partial_slabs; - if (percentage_partial_objs > max_ppartobj) - max_ppartobj = percentage_partial_objs; - if (s->slab_size > max_memobj) - max_memobj = s->slab_size; - - total_partial += s->partial; - total_slabs += s->slabs; - total_size += size; - total_waste += wasted; - - total_objects += s->objects; - total_used += used; - total_partobj += s->objects_partial; - total_ppart += percentage_partial_slabs; - total_ppartobj += percentage_partial_objs; - - total_objwaste += s->objects * objwaste; - total_objsize += s->objects * s->slab_size; - } - - if (!total_objects) { - printf("No objects\n"); - return; - } - if (!used_slabs) { - printf("No slabs\n"); - return; - } - - /* Per slab averages */ - avg_partial = total_partial / used_slabs; - avg_slabs = total_slabs / used_slabs; - avg_size = total_size / used_slabs; - avg_waste = total_waste / used_slabs; - - avg_objects = total_objects / used_slabs; - avg_used = total_used / used_slabs; - avg_partobj = total_partobj / used_slabs; - avg_ppart = total_ppart / used_slabs; - avg_ppartobj = total_ppartobj / used_slabs; - - /* Per object object sizes */ - avg_objsize = total_used / total_objects; - avg_objwaste = total_objwaste / total_objects; - avg_partobj = total_partobj * 100 / total_objects; - avg_memobj = total_objsize / total_objects; - - printf("Slabcache Totals\n"); - printf("----------------\n"); - printf("Slabcaches : %15d Aliases : %11d->%-3d Active: %3d\n", - slabs, aliases, alias_targets, used_slabs); - - store_size(b1, total_size);store_size(b2, total_waste); - store_size(b3, total_waste * 100 / total_used); - printf("Memory used: %15s # Loss : %15s MRatio:%6s%%\n", b1, b2, b3); - - store_size(b1, total_objects);store_size(b2, total_partobj); - store_size(b3, total_partobj * 100 / total_objects); - printf("# Objects : %15s # PartObj: %15s ORatio:%6s%%\n", b1, b2, b3); - - printf("\n"); - printf("Per Cache Average " - "Min Max Total\n"); - printf("---------------------------------------" - "-------------------------------------\n"); - - store_size(b1, avg_objects);store_size(b2, min_objects); - store_size(b3, max_objects);store_size(b4, total_objects); - printf("#Objects %15s %15s %15s %15s\n", - b1, b2, b3, b4); - - store_size(b1, avg_slabs);store_size(b2, min_slabs); - store_size(b3, max_slabs);store_size(b4, total_slabs); - printf("#Slabs %15s %15s %15s %15s\n", - b1, b2, b3, b4); - - store_size(b1, avg_partial);store_size(b2, min_partial); - store_size(b3, max_partial);store_size(b4, total_partial); - printf("#PartSlab %15s %15s %15s %15s\n", - b1, b2, b3, b4); - store_size(b1, avg_ppart);store_size(b2, min_ppart); - store_size(b3, max_ppart); - store_size(b4, total_partial * 100 / total_slabs); - printf("%%PartSlab%15s%% %15s%% %15s%% %15s%%\n", - b1, b2, b3, b4); - - store_size(b1, avg_partobj);store_size(b2, min_partobj); - store_size(b3, max_partobj); - store_size(b4, total_partobj); - printf("PartObjs %15s %15s %15s %15s\n", - b1, b2, b3, b4); - - store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj); - store_size(b3, max_ppartobj); - store_size(b4, total_partobj * 100 / total_objects); - printf("%% PartObj%15s%% %15s%% %15s%% %15s%%\n", - b1, b2, b3, b4); - - store_size(b1, avg_size);store_size(b2, min_size); - store_size(b3, max_size);store_size(b4, total_size); - printf("Memory %15s %15s %15s %15s\n", - b1, b2, b3, b4); - - store_size(b1, avg_used);store_size(b2, min_used); - store_size(b3, max_used);store_size(b4, total_used); - printf("Used %15s %15s %15s %15s\n", - b1, b2, b3, b4); - - store_size(b1, avg_waste);store_size(b2, min_waste); - store_size(b3, max_waste);store_size(b4, total_waste); - printf("Loss %15s %15s %15s %15s\n", - b1, b2, b3, b4); - - printf("\n"); - printf("Per Object Average " - "Min Max\n"); - printf("---------------------------------------" - "--------------------\n"); - - store_size(b1, avg_memobj);store_size(b2, min_memobj); - store_size(b3, max_memobj); - printf("Memory %15s %15s %15s\n", - b1, b2, b3); - store_size(b1, avg_objsize);store_size(b2, min_objsize); - store_size(b3, max_objsize); - printf("User %15s %15s %15s\n", - b1, b2, b3); - - store_size(b1, avg_objwaste);store_size(b2, min_objwaste); - store_size(b3, max_objwaste); - printf("Loss %15s %15s %15s\n", - b1, b2, b3); -} - -static void sort_slabs(void) -{ - struct slabinfo *s1,*s2; - - for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { - for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { - int result; - - if (sort_size) { - if (slab_size(s1) == slab_size(s2)) - result = strcasecmp(s1->name, s2->name); - else - result = slab_size(s1) < slab_size(s2); - } else if (sort_active) { - if (slab_activity(s1) == slab_activity(s2)) - result = strcasecmp(s1->name, s2->name); - else - result = slab_activity(s1) < slab_activity(s2); - } else if (sort_loss) { - if (slab_waste(s1) == slab_waste(s2)) - result = strcasecmp(s1->name, s2->name); - else - result = slab_waste(s1) < slab_waste(s2); - } else if (sort_partial) { - if (s1->partial == s2->partial) - result = strcasecmp(s1->name, s2->name); - else - result = s1->partial < s2->partial; - } else - result = strcasecmp(s1->name, s2->name); - - if (show_inverted) - result = -result; - - if (result > 0) { - struct slabinfo t; - - memcpy(&t, s1, sizeof(struct slabinfo)); - memcpy(s1, s2, sizeof(struct slabinfo)); - memcpy(s2, &t, sizeof(struct slabinfo)); - } - } - } -} - -static void sort_aliases(void) -{ - struct aliasinfo *a1,*a2; - - for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { - for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { - char *n1, *n2; - - n1 = a1->name; - n2 = a2->name; - if (show_alias && !show_inverted) { - n1 = a1->ref; - n2 = a2->ref; - } - if (strcasecmp(n1, n2) > 0) { - struct aliasinfo t; - - memcpy(&t, a1, sizeof(struct aliasinfo)); - memcpy(a1, a2, sizeof(struct aliasinfo)); - memcpy(a2, &t, sizeof(struct aliasinfo)); - } - } - } -} - -static void link_slabs(void) -{ - struct aliasinfo *a; - struct slabinfo *s; - - for (a = aliasinfo; a < aliasinfo + aliases; a++) { - - for (s = slabinfo; s < slabinfo + slabs; s++) - if (strcmp(a->ref, s->name) == 0) { - a->slab = s; - s->refs++; - break; - } - if (s == slabinfo + slabs) - fatal("Unresolved alias %s\n", a->ref); - } -} - -static void alias(void) -{ - struct aliasinfo *a; - char *active = NULL; - - sort_aliases(); - link_slabs(); - - for(a = aliasinfo; a < aliasinfo + aliases; a++) { - - if (!show_single_ref && a->slab->refs == 1) - continue; - - if (!show_inverted) { - if (active) { - if (strcmp(a->slab->name, active) == 0) { - printf(" %s", a->name); - continue; - } - } - printf("\n%-12s <- %s", a->slab->name, a->name); - active = a->slab->name; - } - else - printf("%-15s -> %s\n", a->name, a->slab->name); - } - if (active) - printf("\n"); -} - - -static void rename_slabs(void) -{ - struct slabinfo *s; - struct aliasinfo *a; - - for (s = slabinfo; s < slabinfo + slabs; s++) { - if (*s->name != ':') - continue; - - if (s->refs > 1 && !show_first_alias) - continue; - - a = find_one_alias(s); - - if (a) - s->name = a->name; - else { - s->name = "*"; - actual_slabs--; - } - } -} - -static int slab_mismatch(char *slab) -{ - return regexec(&pattern, slab, 0, NULL, 0); -} - -static void read_slab_dir(void) -{ - DIR *dir; - struct dirent *de; - struct slabinfo *slab = slabinfo; - struct aliasinfo *alias = aliasinfo; - char *p; - char *t; - int count; - - if (chdir("/sys/kernel/slab") && chdir("/sys/slab")) - fatal("SYSFS support for SLUB not active\n"); - - dir = opendir("."); - while ((de = readdir(dir))) { - if (de->d_name[0] == '.' || - (de->d_name[0] != ':' && slab_mismatch(de->d_name))) - continue; - switch (de->d_type) { - case DT_LNK: - alias->name = strdup(de->d_name); - count = readlink(de->d_name, buffer, sizeof(buffer)-1); - - if (count < 0) - fatal("Cannot read symlink %s\n", de->d_name); - - buffer[count] = 0; - p = buffer + count; - while (p > buffer && p[-1] != '/') - p--; - alias->ref = strdup(p); - alias++; - break; - case DT_DIR: - if (chdir(de->d_name)) - fatal("Unable to access slab %s\n", slab->name); - slab->name = strdup(de->d_name); - slab->alias = 0; - slab->refs = 0; - slab->aliases = get_obj("aliases"); - slab->align = get_obj("align"); - slab->cache_dma = get_obj("cache_dma"); - slab->cpu_slabs = get_obj("cpu_slabs"); - slab->destroy_by_rcu = get_obj("destroy_by_rcu"); - slab->hwcache_align = get_obj("hwcache_align"); - slab->object_size = get_obj("object_size"); - slab->objects = get_obj("objects"); - slab->objects_partial = get_obj("objects_partial"); - slab->objects_total = get_obj("objects_total"); - slab->objs_per_slab = get_obj("objs_per_slab"); - slab->order = get_obj("order"); - slab->partial = get_obj("partial"); - slab->partial = get_obj_and_str("partial", &t); - decode_numa_list(slab->numa_partial, t); - free(t); - slab->poison = get_obj("poison"); - slab->reclaim_account = get_obj("reclaim_account"); - slab->red_zone = get_obj("red_zone"); - slab->sanity_checks = get_obj("sanity_checks"); - slab->slab_size = get_obj("slab_size"); - slab->slabs = get_obj_and_str("slabs", &t); - decode_numa_list(slab->numa, t); - free(t); - slab->store_user = get_obj("store_user"); - slab->trace = get_obj("trace"); - slab->alloc_fastpath = get_obj("alloc_fastpath"); - slab->alloc_slowpath = get_obj("alloc_slowpath"); - slab->free_fastpath = get_obj("free_fastpath"); - slab->free_slowpath = get_obj("free_slowpath"); - slab->free_frozen= get_obj("free_frozen"); - slab->free_add_partial = get_obj("free_add_partial"); - slab->free_remove_partial = get_obj("free_remove_partial"); - slab->alloc_from_partial = get_obj("alloc_from_partial"); - slab->alloc_slab = get_obj("alloc_slab"); - slab->alloc_refill = get_obj("alloc_refill"); - slab->free_slab = get_obj("free_slab"); - slab->cpuslab_flush = get_obj("cpuslab_flush"); - slab->deactivate_full = get_obj("deactivate_full"); - slab->deactivate_empty = get_obj("deactivate_empty"); - slab->deactivate_to_head = get_obj("deactivate_to_head"); - slab->deactivate_to_tail = get_obj("deactivate_to_tail"); - slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); - slab->order_fallback = get_obj("order_fallback"); - slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); - slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); - slab->cpu_partial_alloc = get_obj("cpu_partial_alloc"); - slab->cpu_partial_free = get_obj("cpu_partial_free"); - slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); - slab->deactivate_bypass = get_obj("deactivate_bypass"); - chdir(".."); - if (slab->name[0] == ':') - alias_targets++; - slab++; - break; - default : - fatal("Unknown file type %lx\n", de->d_type); - } - } - closedir(dir); - slabs = slab - slabinfo; - actual_slabs = slabs; - aliases = alias - aliasinfo; - if (slabs > MAX_SLABS) - fatal("Too many slabs\n"); - if (aliases > MAX_ALIASES) - fatal("Too many aliases\n"); -} - -static void output_slabs(void) -{ - struct slabinfo *slab; - int lines = output_lines; - - for (slab = slabinfo; (slab < slabinfo + slabs) && - lines != 0; slab++) { - - if (slab->alias) - continue; - - if (lines != -1) - lines--; - - if (show_numa) - slab_numa(slab, 0); - else if (show_track) - show_tracking(slab); - else if (validate) - slab_validate(slab); - else if (shrink) - slab_shrink(slab); - else if (set_debug) - slab_debug(slab); - else if (show_ops) - ops(slab); - else if (show_slab) - slabcache(slab); - else if (show_report) - report(slab); - } -} - -static void _xtotals(char *heading, char *underline, - int loss, int size, int partial) -{ - printf("%s%s", heading, underline); - line = 0; - sort_loss = loss; - sort_size = size; - sort_partial = partial; - sort_slabs(); - output_slabs(); -} - -static void xtotals(void) -{ - char *heading, *underline; - - totals(); - - link_slabs(); - rename_slabs(); - - heading = "\nSlabs sorted by size\n"; - underline = "--------------------\n"; - _xtotals(heading, underline, 0, 1, 0); - - heading = "\nSlabs sorted by loss\n"; - underline = "--------------------\n"; - _xtotals(heading, underline, 1, 0, 0); - - heading = "\nSlabs sorted by number of partial slabs\n"; - underline = "---------------------------------------\n"; - _xtotals(heading, underline, 0, 0, 1); - - printf("\n"); -} - -struct option opts[] = { - { "aliases", no_argument, NULL, 'a' }, - { "activity", no_argument, NULL, 'A' }, - { "Bytes", no_argument, NULL, 'B'}, - { "debug", optional_argument, NULL, 'd' }, - { "display-activity", no_argument, NULL, 'D' }, - { "empty", no_argument, NULL, 'e' }, - { "first-alias", no_argument, NULL, 'f' }, - { "help", no_argument, NULL, 'h' }, - { "inverted", no_argument, NULL, 'i'}, - { "slabs", no_argument, NULL, 'l' }, - { "Loss", no_argument, NULL, 'L'}, - { "numa", no_argument, NULL, 'n' }, - { "lines", required_argument, NULL, 'N'}, - { "ops", no_argument, NULL, 'o' }, - { "partial", no_argument, NULL, 'p'}, - { "report", no_argument, NULL, 'r' }, - { "shrink", no_argument, NULL, 's' }, - { "Size", no_argument, NULL, 'S'}, - { "tracking", no_argument, NULL, 't'}, - { "Totals", no_argument, NULL, 'T'}, - { "Unreclaim", no_argument, NULL, 'U'}, - { "validate", no_argument, NULL, 'v' }, - { "Xtotals", no_argument, NULL, 'X'}, - { "zero", no_argument, NULL, 'z' }, - { "1ref", no_argument, NULL, '1'}, - { NULL, 0, NULL, 0 } -}; - -int main(int argc, char *argv[]) -{ - int c; - int err; - char *pattern_source; - - page_size = getpagesize(); - - while ((c = getopt_long(argc, argv, "aABd::DefhilLnN:oPrsStTUvXz1", - opts, NULL)) != -1) - switch (c) { - case 'a': - show_alias = 1; - break; - case 'A': - sort_active = 1; - break; - case 'B': - show_bytes = 1; - break; - case 'd': - set_debug = 1; - if (!debug_opt_scan(optarg)) - fatal("Invalid debug option '%s'\n", optarg); - break; - case 'D': - show_activity = 1; - break; - case 'e': - show_empty = 1; - break; - case 'f': - show_first_alias = 1; - break; - case 'h': - usage(); - return 0; - case 'i': - show_inverted = 1; - break; - case 'l': - show_slab = 1; - break; - case 'L': - sort_loss = 1; - break; - case 'n': - show_numa = 1; - break; - case 'N': - if (optarg) { - output_lines = atoi(optarg); - if (output_lines < 1) - output_lines = 1; - } - break; - case 'o': - show_ops = 1; - break; - case 'r': - show_report = 1; - break; - case 'P': - sort_partial = 1; - break; - case 's': - shrink = 1; - break; - case 'S': - sort_size = 1; - break; - case 't': - show_track = 1; - break; - case 'T': - show_totals = 1; - break; - case 'U': - unreclaim_only = 1; - break; - case 'v': - validate = 1; - break; - case 'X': - if (output_lines == -1) - output_lines = 1; - extended_totals = 1; - show_bytes = 1; - break; - case 'z': - skip_zero = 0; - break; - case '1': - show_single_ref = 1; - break; - default: - fatal("%s: Invalid option '%c'\n", argv[0], optopt); - - } - - if (!show_slab && !show_alias && !show_track && !show_report - && !validate && !shrink && !set_debug && !show_ops) - show_slab = 1; - - if (argc > optind) - pattern_source = argv[optind]; - else - pattern_source = ".*"; - - err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB); - if (err) - fatal("%s: Invalid pattern '%s' code %d\n", - argv[0], pattern_source, err); - read_slab_dir(); - if (show_alias) { - alias(); - } else if (extended_totals) { - xtotals(); - } else if (show_totals) { - totals(); - } else { - link_slabs(); - rename_slabs(); - sort_slabs(); - output_slabs(); - } - return 0; -} -- cgit v1.2.3-58-ga151