From b1d29ba82cf2bc784f4c963ddd6a2cf29e229b33 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 26 Oct 2018 15:06:08 -0700 Subject: delayacct: track delays from thrashing cache pages Delay accounting already measures the time a task spends in direct reclaim and waiting for swapin, but in low memory situations tasks spend can spend a significant amount of their time waiting on thrashing page cache. This isn't tracked right now. To know the full impact of memory contention on an individual task, measure the delay when waiting for a recently evicted active cache page to read back into memory. Also update tools/accounting/getdelays.c: [hannes@computer accounting]$ sudo ./getdelays -d -p 1 print delayacct stats ON PID 1 CPU count real total virtual total delay total delay average 50318 745000000 847346785 400533713 0.008ms IO count delay total delay average 435 122601218 0ms SWAP count delay total delay average 0 0 0ms RECLAIM count delay total delay average 0 0 0ms THRASHING count delay total delay average 19 12621439 0ms Link: http://lkml.kernel.org/r/20180828172258.3185-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Peter Zijlstra (Intel) Tested-by: Daniel Drake Tested-by: Suren Baghdasaryan Cc: Christopher Lameter Cc: Ingo Molnar Cc: Johannes Weiner Cc: Mike Galbraith Cc: Peter Enderborg Cc: Randy Dunlap Cc: Shakeel Butt Cc: Tejun Heo Cc: Vinayak Menon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/accounting/getdelays.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 9f420d98b5fb..8cb504d30384 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -203,6 +203,8 @@ static void print_delayacct(struct taskstats *t) "SWAP %15s%15s%15s\n" " %15llu%15llu%15llums\n" "RECLAIM %12s%15s%15s\n" + " %15llu%15llu%15llums\n" + "THRASHING%12s%15s%15s\n" " %15llu%15llu%15llums\n", "count", "real total", "virtual total", "delay total", "delay average", @@ -222,7 +224,11 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->freepages_count, (unsigned long long)t->freepages_delay_total, - average_ms(t->freepages_delay_total, t->freepages_count)); + average_ms(t->freepages_delay_total, t->freepages_count), + "count", "delay total", "delay average", + (unsigned long long)t->thrashing_count, + (unsigned long long)t->thrashing_delay_total, + average_ms(t->thrashing_delay_total, t->thrashing_count)); } static void task_context_switch_counts(struct taskstats *t) -- cgit v1.2.3-58-ga151 From 439de0d7443789c688428429874b8e27f693c00e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 26 Oct 2018 15:09:09 -0700 Subject: userfaultfd: selftest: cleanup help messages Firstly, the help in the comment region is obsolete, now we support three parameters. Since at it, change it and move it into the help message of the program. Also, the help messages dumped here and there is obsolete too. Use a single usage() helper. Link: http://lkml.kernel.org/r/20180930074259.18229-2-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Mike Rapoport Cc: Shuah Khan Cc: Mike Kravetz Cc: Jerome Glisse Cc: Zi Yan Cc: "Kirill A . Shutemov" Cc: Shaohua Li Cc: Andrea Arcangeli Cc: "Dr . David Alan Gilbert" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 46 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7b8171e3128a..5ff3a4f9173e 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -34,18 +34,6 @@ * per-CPU threads 1 by triggering userfaults inside * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). - * - * The program takes two parameters: the amounts of physical memory in - * megabytes (MiB) of the area and the number of bounces to execute. - * - * # 100MiB 99999 bounces - * ./userfaultfd 100 99999 - * - * # 1GiB 99 bounces - * ./userfaultfd 1000 99 - * - * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers - * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done */ #define _GNU_SOURCE @@ -115,6 +103,30 @@ pthread_attr_t attr; ~(unsigned long)(sizeof(unsigned long long) \ - 1))) +const char *examples = + "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" + "./userfaultfd anon 100 99999\n\n" + "# Run share memory test on 1GiB region with 99 bounces:\n" + "./userfaultfd shmem 1000 99\n\n" + "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n" + "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n" + "# Run the same hugetlb test but using shmem:\n" + "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n" + "# 10MiB-~6GiB 999 bounces anonymous test, " + "continue forever unless an error triggers\n" + "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; + +static void usage(void) +{ + fprintf(stderr, "\nUsage: ./userfaultfd " + "[hugetlbfs_file]\n\n"); + fprintf(stderr, "Supported : anon, hugetlb, " + "hugetlb_shared, shmem\n\n"); + fprintf(stderr, "Examples:\n\n"); + fprintf(stderr, examples); + exit(1); +} + static int anon_release_pages(char *rel_area) { int ret = 0; @@ -1272,8 +1284,7 @@ static void sigalrm(int sig) int main(int argc, char **argv) { if (argc < 4) - fprintf(stderr, "Usage: [hugetlbfs_file]\n"), - exit(1); + usage(); if (signal(SIGALRM, sigalrm) == SIG_ERR) fprintf(stderr, "failed to arm SIGALRM"), exit(1); @@ -1286,20 +1297,19 @@ int main(int argc, char **argv) nr_cpus; if (!nr_pages_per_cpu) { fprintf(stderr, "invalid MiB\n"); - fprintf(stderr, "Usage: \n"), exit(1); + usage(); } bounces = atoi(argv[3]); if (bounces <= 0) { fprintf(stderr, "invalid bounces\n"); - fprintf(stderr, "Usage: \n"), exit(1); + usage(); } nr_pages = nr_pages_per_cpu * nr_cpus; if (test_type == TEST_HUGETLB) { if (argc < 5) - fprintf(stderr, "Usage: hugetlb \n"), - exit(1); + usage(); huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); if (huge_fd < 0) { fprintf(stderr, "Open of %s failed", argv[3]); -- cgit v1.2.3-58-ga151 From 04d877319e2aa3895cc7998adb32de0967d3927b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 26 Oct 2018 15:09:13 -0700 Subject: userfaultfd: selftest: generalize read and poll We do very similar things in read and poll modes, but we're copying the codes around. Share the codes properly on reading the message and handling the page fault to make the code cleaner. Meanwhile this solves previous mismatch of behaviors between the two modes on that the old code: - did not check EAGAIN case in read() mode - ignored BOUNCE_VERIFY check in read() mode Link: http://lkml.kernel.org/r/20180930074259.18229-3-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Mike Rapoport Cc: Shuah Khan Cc: Mike Kravetz Cc: Jerome Glisse Cc: Zi Yan Cc: "Kirill A . Shutemov" Cc: Shaohua Li Cc: Andrea Arcangeli Cc: "Dr . David Alan Gilbert" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 77 ++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 5ff3a4f9173e..7a8c6937cc67 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -451,6 +451,43 @@ static int copy_page(int ufd, unsigned long offset) return __copy_page(ufd, offset, false); } +static int uffd_read_msg(int ufd, struct uffd_msg *msg) +{ + int ret = read(uffd, msg, sizeof(*msg)); + + if (ret != sizeof(*msg)) { + if (ret < 0) { + if (errno == EAGAIN) + return 1; + else + perror("blocking read error"), exit(1); + } else { + fprintf(stderr, "short read\n"), exit(1); + } + } + + return 0; +} + +/* Return 1 if page fault handled by us; otherwise 0 */ +static int uffd_handle_page_fault(struct uffd_msg *msg) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg->event), exit(1); + + if (bounces & BOUNCE_VERIFY && + msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(page_size-1); + + return copy_page(uffd, offset); +} + static void *uffd_poll_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -458,7 +495,6 @@ static void *uffd_poll_thread(void *arg) struct uffd_msg msg; struct uffdio_register uffd_reg; int ret; - unsigned long offset; char tmp_chr; unsigned long userfaults = 0; @@ -482,25 +518,15 @@ static void *uffd_poll_thread(void *arg) if (!(pollfd[0].revents & POLLIN)) fprintf(stderr, "pollfd[0].revents %d\n", pollfd[0].revents), exit(1); - ret = read(uffd, &msg, sizeof(msg)); - if (ret < 0) { - if (errno == EAGAIN) - continue; - perror("nonblocking read error"), exit(1); - } + if (uffd_read_msg(uffd, &msg)) + continue; switch (msg.event) { default: fprintf(stderr, "unexpected msg event %u\n", msg.event), exit(1); break; case UFFD_EVENT_PAGEFAULT: - if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); - offset = (char *)(unsigned long)msg.arg.pagefault.address - - area_dst; - offset &= ~(page_size-1); - if (copy_page(uffd, offset)) - userfaults++; + userfaults += uffd_handle_page_fault(&msg); break; case UFFD_EVENT_FORK: close(uffd); @@ -528,8 +554,6 @@ static void *uffd_read_thread(void *arg) { unsigned long *this_cpu_userfaults; struct uffd_msg msg; - unsigned long offset; - int ret; this_cpu_userfaults = (unsigned long *) arg; *this_cpu_userfaults = 0; @@ -538,24 +562,9 @@ static void *uffd_read_thread(void *arg) /* from here cancellation is ok */ for (;;) { - ret = read(uffd, &msg, sizeof(msg)); - if (ret != sizeof(msg)) { - if (ret < 0) - perror("blocking read error"), exit(1); - else - fprintf(stderr, "short read\n"), exit(1); - } - if (msg.event != UFFD_EVENT_PAGEFAULT) - fprintf(stderr, "unexpected msg event %u\n", - msg.event), exit(1); - if (bounces & BOUNCE_VERIFY && - msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); - offset = (char *)(unsigned long)msg.arg.pagefault.address - - area_dst; - offset &= ~(page_size-1); - if (copy_page(uffd, offset)) - (*this_cpu_userfaults)++; + if (uffd_read_msg(uffd, &msg)) + continue; + (*this_cpu_userfaults) += uffd_handle_page_fault(&msg); } return (void *)NULL; } -- cgit v1.2.3-58-ga151 From 7eaa8c969efa77127de9a05856eef9e5d22cf487 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 26 Oct 2018 15:09:17 -0700 Subject: userfaultfd: selftest: recycle lock threads first Now we recycle the uffd servicing threads earlier than the lock threads. It might happen that when the lock thread is still blocked at a pthread mutex lock while the servicing thread has already quitted for the cpu so the lock thread will be blocked forever and hang the test program. To fix the possible race, recycle the lock threads first. This never happens with current missing-only tests, but when I start to run the write-protection tests (the feature is not yet posted upstream) it happens every time of the run possibly because in that new test we'll need to service two page faults for each lock operation. Link: http://lkml.kernel.org/r/20180930074259.18229-4-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Mike Rapoport Cc: Shuah Khan Cc: Mike Kravetz Cc: Jerome Glisse Cc: Zi Yan Cc: "Kirill A . Shutemov" Cc: Shaohua Li Cc: Andrea Arcangeli Cc: "Dr . David Alan Gilbert" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7a8c6937cc67..5d1db824f73a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -626,6 +626,12 @@ static int stress(unsigned long *userfaults) if (uffd_test_ops->release_pages(area_src)) return 1; + + finished = 1; + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(locking_threads[cpu], NULL)) + return 1; + for (cpu = 0; cpu < nr_cpus; cpu++) { char c; if (bounces & BOUNCE_POLL) { @@ -643,11 +649,6 @@ static int stress(unsigned long *userfaults) } } - finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) - if (pthread_join(locking_threads[cpu], NULL)) - return 1; - return 0; } -- cgit v1.2.3-58-ga151 From 26db3d09d9e1963c9db77cb275bd2d36e56ef57a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 26 Oct 2018 15:09:52 -0700 Subject: mm/gup_benchmark.c: time put_page() We'd like to measure time to unpin user pages, so this adds a second benchmark timer on put_page, separate from get_page. Adding the field breaks this ioctl ABI, but should be okay since this an in-tree kernel selftest. [akpm@linux-foundation.org: add expansion to struct gup_benchmark for future use] Link: http://lkml.kernel.org/r/20181010195605.10689-1-keith.busch@intel.com Signed-off-by: Keith Busch Acked-by: Kirill A. Shutemov Reviewed-by: Andrew Morton Cc: Dave Hansen Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup_benchmark.c | 9 +++++++-- tools/testing/selftests/vm/gup_benchmark.c | 6 ++++-- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c index 7405c9d89d65..e6aff102373b 100644 --- a/mm/gup_benchmark.c +++ b/mm/gup_benchmark.c @@ -8,11 +8,13 @@ #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) struct gup_benchmark { - __u64 delta_usec; + __u64 get_delta_usec; + __u64 put_delta_usec; __u64 addr; __u64 size; __u32 nr_pages_per_call; __u32 flags; + __u64 expansion[10]; /* For future use */ }; static int __gup_benchmark_ioctl(unsigned int cmd, @@ -48,14 +50,17 @@ static int __gup_benchmark_ioctl(unsigned int cmd, } end_time = ktime_get(); - gup->delta_usec = ktime_us_delta(end_time, start_time); + gup->get_delta_usec = ktime_us_delta(end_time, start_time); gup->size = addr - gup->addr; + start_time = ktime_get(); for (i = 0; i < nr_pages; i++) { if (!pages[i]) break; put_page(pages[i]); } + end_time = ktime_get(); + gup->put_delta_usec = ktime_us_delta(end_time, start_time); kvfree(pages); return 0; diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 36df55132036..bdcb97acd0ac 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -17,7 +17,8 @@ #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) struct gup_benchmark { - __u64 delta_usec; + __u64 get_delta_usec; + __u64 put_delta_usec; __u64 addr; __u64 size; __u32 nr_pages_per_call; @@ -81,7 +82,8 @@ int main(int argc, char **argv) if (ioctl(fd, GUP_FAST_BENCHMARK, &gup)) perror("ioctl"), exit(1); - printf("Time: %lld us", gup.delta_usec); + printf("Time: get:%lld put:%lld us", gup.get_delta_usec, + gup.put_delta_usec); if (gup.size != size) printf(", truncated (size: %lld)", gup.size); printf("\n"); -- cgit v1.2.3-58-ga151 From 714a3a1ebafe6e23af55a5c694c308f4044a7e00 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 26 Oct 2018 15:09:56 -0700 Subject: mm/gup_benchmark.c: add additional pinning methods Provide new gup benchmark ioctl commands to run different user page pinning methods, get_user_pages_longterm() and get_user_pages(), in addition to the existing get_user_pages_fast(). Link: http://lkml.kernel.org/r/20181010195605.10689-2-keith.busch@intel.com Signed-off-by: Keith Busch Acked-by: Kirill A. Shutemov Reviewed-by: Andrew Morton Cc: Dave Hansen Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup_benchmark.c | 28 ++++++++++++++++++++++++++-- tools/testing/selftests/vm/gup_benchmark.c | 13 +++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c index e6aff102373b..debf11388a60 100644 --- a/mm/gup_benchmark.c +++ b/mm/gup_benchmark.c @@ -6,6 +6,8 @@ #include #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) +#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) +#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) struct gup_benchmark { __u64 get_delta_usec; @@ -43,7 +45,23 @@ static int __gup_benchmark_ioctl(unsigned int cmd, nr = (next - addr) / PAGE_SIZE; } - nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i); + switch (cmd) { + case GUP_FAST_BENCHMARK: + nr = get_user_pages_fast(addr, nr, gup->flags & 1, + pages + i); + break; + case GUP_LONGTERM_BENCHMARK: + nr = get_user_pages_longterm(addr, nr, gup->flags & 1, + pages + i, NULL); + break; + case GUP_BENCHMARK: + nr = get_user_pages(addr, nr, gup->flags & 1, pages + i, + NULL); + break; + default: + return -1; + } + if (nr <= 0) break; i += nr; @@ -72,8 +90,14 @@ static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd, struct gup_benchmark gup; int ret; - if (cmd != GUP_FAST_BENCHMARK) + switch (cmd) { + case GUP_FAST_BENCHMARK: + case GUP_LONGTERM_BENCHMARK: + case GUP_BENCHMARK: + break; + default: return -EINVAL; + } if (copy_from_user(&gup, (void __user *)arg, sizeof(gup))) return -EFAULT; diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index bdcb97acd0ac..c2f785ded9b9 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -15,6 +15,8 @@ #define PAGE_SIZE sysconf(_SC_PAGESIZE) #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) +#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) +#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) struct gup_benchmark { __u64 get_delta_usec; @@ -30,9 +32,10 @@ int main(int argc, char **argv) struct gup_benchmark gup; unsigned long size = 128 * MB; int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + int cmd = GUP_FAST_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:tTLU")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; @@ -49,6 +52,12 @@ int main(int argc, char **argv) case 'T': thp = 0; break; + case 'L': + cmd = GUP_LONGTERM_BENCHMARK; + break; + case 'U': + cmd = GUP_BENCHMARK; + break; case 'w': write = 1; default: @@ -79,7 +88,7 @@ int main(int argc, char **argv) for (i = 0; i < repeats; i++) { gup.size = size; - if (ioctl(fd, GUP_FAST_BENCHMARK, &gup)) + if (ioctl(fd, cmd, &gup)) perror("ioctl"), exit(1); printf("Time: get:%lld put:%lld us", gup.get_delta_usec, -- cgit v1.2.3-58-ga151 From 319e0bec1aecb36c5ac6d23812af487ff2c8f47f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 26 Oct 2018 15:09:59 -0700 Subject: tools/testing/selftests/vm/gup_benchmark.c: fix 'write' flag usage If the '-w' parameter was provided, the benchmark would exit due to a mssing 'break'. Link: http://lkml.kernel.org/r/20181010195605.10689-3-keith.busch@intel.com Signed-off-by: Keith Busch Acked-by: Kirill A. Shutemov Reviewed-by: Andrew Morton Cc: Dave Hansen Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_benchmark.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index c2f785ded9b9..b2082df8beb4 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -60,6 +60,7 @@ int main(int argc, char **argv) break; case 'w': write = 1; + break; default: return -1; } -- cgit v1.2.3-58-ga151 From aeb85ed4f41a8c0f5c4606d69f5da75e2348d984 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 26 Oct 2018 15:10:02 -0700 Subject: tools/testing/selftests/vm/gup_benchmark.c: allow user specified file Allow a user to specify a file to map by adding a new option, '-f', providing a means to test various file backings. If not specified, the benchmark will use a private mapping of /dev/zero, which produces an anonymous mapping as before. [akpm@linux-foundation.org: avoid using comma operator] Link: http://lkml.kernel.org/r/20181010195605.10689-4-keith.busch@intel.com Signed-off-by: Keith Busch Reviewed-by: Andrew Morton Cc: Kirill Shutemov Cc: Dave Hansen Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_benchmark.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index b2082df8beb4..43a2d60e0e93 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -31,11 +31,12 @@ int main(int argc, char **argv) { struct gup_benchmark gup; unsigned long size = 128 * MB; - int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; int cmd = GUP_FAST_BENCHMARK; + char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:tTLU")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:tTLU")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; @@ -61,11 +62,20 @@ int main(int argc, char **argv) case 'w': write = 1; break; + case 'f': + file = optarg; + break; default: return -1; } } + filed = open(file, O_RDWR|O_CREAT); + if (filed < 0) { + perror("open"); + exit(filed); + } + gup.nr_pages_per_call = nr_pages; gup.flags = write; @@ -73,8 +83,7 @@ int main(int argc, char **argv) if (fd == -1) perror("open"), exit(1); - p = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, filed, 0); if (p == MAP_FAILED) perror("mmap"), exit(1); gup.addr = (unsigned long)p; -- cgit v1.2.3-58-ga151 From 0dd8666afb99c71d124e0c2abf1ad7d934a242a0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 26 Oct 2018 15:10:08 -0700 Subject: tools/testing/selftests/vm/gup_benchmark.c: add MAP_SHARED option Add a new benchmark option, -S, to request MAP_SHARED. This can be used to compare with MAP_PRIVATE, or for files that require this option, like dax. Link: http://lkml.kernel.org/r/20181010195605.10689-5-keith.busch@intel.com Signed-off-by: Keith Busch Reviewed-by: Andrew Morton Cc: Kirill Shutemov Cc: Dave Hansen Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_benchmark.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 43a2d60e0e93..5c8e4cb1441a 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -32,11 +32,11 @@ int main(int argc, char **argv) struct gup_benchmark gup; unsigned long size = 128 * MB; int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; - int cmd = GUP_FAST_BENCHMARK; + int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE; char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:f:tTLU")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:tTLUS")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; @@ -65,6 +65,10 @@ int main(int argc, char **argv) case 'f': file = optarg; break; + case 'S': + flags &= ~MAP_PRIVATE; + flags |= MAP_SHARED; + break; default: return -1; } @@ -83,7 +87,7 @@ int main(int argc, char **argv) if (fd == -1) perror("open"), exit(1); - p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, filed, 0); + p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); if (p == MAP_FAILED) perror("mmap"), exit(1); gup.addr = (unsigned long)p; -- cgit v1.2.3-58-ga151 From 3821b76c3cdb5f2c5ef1b082de79829e8ff50a7d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 26 Oct 2018 15:10:12 -0700 Subject: tools/testing/selftests/vm/gup_benchmark.c: add MAP_HUGETLB option Add a new option '-H' to the gup benchmark to help understand how hugetlb mapping pages compare with the default. Link: http://lkml.kernel.org/r/20181010195605.10689-6-keith.busch@intel.com Signed-off-by: Keith Busch Reviewed-by: Andrew Morton Cc: Kirill Shutemov Cc: Dave Hansen Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_benchmark.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 5c8e4cb1441a..880b96fc80d4 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -36,7 +36,7 @@ int main(int argc, char **argv) char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:f:tTLUS")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; @@ -69,6 +69,9 @@ int main(int argc, char **argv) flags &= ~MAP_PRIVATE; flags |= MAP_SHARED; break; + case 'H': + flags |= MAP_HUGETLB; + break; default: return -1; } -- cgit v1.2.3-58-ga151 From 91cbacc34512e37c9bb89125ca4b224ca6459245 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 26 Oct 2018 15:10:48 -0700 Subject: tools/testing/selftests/vm/map_fixed_noreplace.c: add test for MAP_FIXED_NOREPLACE Add a test for MAP_FIXED_NOREPLACE, based on some code originally by Jann Horn. This would have caught the overlap bug reported by Daniel Micay. I originally suggested to Michal that we create MAP_FIXED_NOREPLACE, but instead of writing a selftest I spent my time bike-shedding whether it should be called MAP_FIXED_SAFE/NOCLOBBER/WEAK/NEW .. mea culpa. Link: http://lkml.kernel.org/r/20181013133929.28653-1-mpe@ellerman.id.au Signed-off-by: Michael Ellerman Reviewed-by: Kees Cook Reviewed-by: Khalid Aziz Acked-by: Michal Hocko Cc: Jann Horn Cc: Andrea Arcangeli Cc: Florian Weimer Cc: John Hubbard Cc: Matthew Wilcox Cc: Abdul Haleem Cc: Joel Stanley Cc: Jason Evans Cc: David Goldblatt Cc: Daniel Micay Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/map_fixed_noreplace.c | 206 +++++++++++++++++++++++ 3 files changed, 208 insertions(+) create mode 100644 tools/testing/selftests/vm/map_fixed_noreplace.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index af5ff83f6d7f..31b3c98b6d34 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -13,3 +13,4 @@ mlock-random-test virtual_address_range gup_benchmark va_128TBswitch +map_fixed_noreplace diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e94b7b14bcb2..6e67e726e5a5 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -12,6 +12,7 @@ TEST_GEN_FILES += gup_benchmark TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_populate TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c new file mode 100644 index 000000000000..d91bde511268 --- /dev/null +++ b/tools/testing/selftests/vm/map_fixed_noreplace.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test that MAP_FIXED_NOREPLACE works. + * + * Copyright 2018, Jann Horn + * Copyright 2018, Michael Ellerman, IBM Corporation. + */ + +#include +#include +#include +#include +#include + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0x100000 +#endif + +#define BASE_ADDRESS (256ul * 1024 * 1024) + + +static void dump_maps(void) +{ + char cmd[32]; + + snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid()); + system(cmd); +} + +int main(void) +{ + unsigned long flags, addr, size, page_size; + char *p; + + page_size = sysconf(_SC_PAGE_SIZE); + + flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; + + // Check we can map all the areas we need below + errno = 0; + addr = BASE_ADDRESS; + size = 5 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error: couldn't map the space we need for the test\n"); + return 1; + } + + errno = 0; + if (munmap((void *)addr, 5 * page_size) != 0) { + dump_maps(); + printf("Error: munmap failed!?\n"); + return 1; + } + printf("unmap() successful\n"); + + errno = 0; + addr = BASE_ADDRESS + page_size; + size = 3 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error: first mmap() failed unexpectedly\n"); + return 1; + } + + /* + * Exact same mapping again: + * base | free | new + * +1 | mapped | new + * +2 | mapped | new + * +3 | mapped | new + * +4 | free | new + */ + errno = 0; + addr = BASE_ADDRESS; + size = 5 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:1: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Second mapping contained within first: + * + * base | free | + * +1 | mapped | + * +2 | mapped | new + * +3 | mapped | + * +4 | free | + */ + errno = 0; + addr = BASE_ADDRESS + (2 * page_size); + size = page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:2: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Overlap end of existing mapping: + * base | free | + * +1 | mapped | + * +2 | mapped | + * +3 | mapped | new + * +4 | free | new + */ + errno = 0; + addr = BASE_ADDRESS + (3 * page_size); + size = 2 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:3: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Overlap start of existing mapping: + * base | free | new + * +1 | mapped | new + * +2 | mapped | + * +3 | mapped | + * +4 | free | + */ + errno = 0; + addr = BASE_ADDRESS; + size = 2 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:4: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Adjacent to start of existing mapping: + * base | free | new + * +1 | mapped | + * +2 | mapped | + * +3 | mapped | + * +4 | free | + */ + errno = 0; + addr = BASE_ADDRESS; + size = page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error:5: mmap() failed when it shouldn't have\n"); + return 1; + } + + /* + * Adjacent to end of existing mapping: + * base | free | + * +1 | mapped | + * +2 | mapped | + * +3 | mapped | + * +4 | free | new + */ + errno = 0; + addr = BASE_ADDRESS + (4 * page_size); + size = page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error:6: mmap() failed when it shouldn't have\n"); + return 1; + } + + addr = BASE_ADDRESS; + size = 5 * page_size; + if (munmap((void *)addr, size) != 0) { + dump_maps(); + printf("Error: munmap failed!?\n"); + return 1; + } + printf("unmap() successful\n"); + + printf("OK\n"); + return 0; +} -- cgit v1.2.3-58-ga151