From 7ee36a14f06cc937f6b2c2932c2e48f590970581 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 28 Jul 2016 15:47:17 -0700 Subject: mm, vmscan: Update all zone LRU sizes before updating memcg Minchan Kim reported setting the following warning on a 32-bit system although it can affect 64-bit systems. WARNING: CPU: 4 PID: 1322 at mm/memcontrol.c:998 mem_cgroup_update_lru_size+0x103/0x110 mem_cgroup_update_lru_size(f44b4000, 1, -7): zid 1 lru_size 1 but empty Modules linked in: CPU: 4 PID: 1322 Comm: cp Not tainted 4.7.0-rc4-mm1+ #143 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x76/0xaf __warn+0xea/0x110 ? mem_cgroup_update_lru_size+0x103/0x110 warn_slowpath_fmt+0x3b/0x40 mem_cgroup_update_lru_size+0x103/0x110 isolate_lru_pages.isra.61+0x2e2/0x360 shrink_active_list+0xac/0x2a0 ? __delay+0xe/0x10 shrink_node_memcg+0x53c/0x7a0 shrink_node+0xab/0x2a0 do_try_to_free_pages+0xc6/0x390 try_to_free_pages+0x245/0x590 LRU list contents and counts are updated separately. Counts are updated before pages are added to the LRU and updated after pages are removed. The warning above is from a check in mem_cgroup_update_lru_size that ensures that list sizes of zero are empty. The problem is that node-lru needs to account for highmem pages if CONFIG_HIGHMEM is set. One impact of the implementation is that the sizes are updated in multiple passes when pages from multiple zones were isolated. This happens whether HIGHMEM is set or not. When multiple zones are isolated, it's possible for a debugging check in memcg to be tripped. This patch forces all the zone counts to be updated before the memcg function is called. Link: http://lkml.kernel.org/r/1468588165-12461-6-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman Tested-by: Minchan Kim Reported-by: Minchan Kim Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 5625eccc0140..b3f5b359280d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1350,6 +1350,38 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) return ret; } + +/* + * Update LRU sizes after isolating pages. The LRU size updates must + * be complete before mem_cgroup_update_lru_size due to a santity check. + */ +static __always_inline void update_lru_sizes(struct lruvec *lruvec, + enum lru_list lru, unsigned long *nr_zone_taken, + unsigned long nr_taken) +{ +#ifdef CONFIG_HIGHMEM + int zid; + + /* + * Highmem has separate accounting for highmem pages so each zone + * is updated separately. + */ + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + if (!nr_zone_taken[zid]) + continue; + + __update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); + } +#else + /* Zone ID does not matter on !HIGHMEM */ + __update_lru_size(lruvec, lru, 0, -nr_taken); +#endif + +#ifdef CONFIG_MEMCG + mem_cgroup_update_lru_size(lruvec, lru, -nr_taken); +#endif +} + /* * zone_lru_lock is heavily contended. Some of the functions that * shrink the lists perform better by taking out a batch of pages @@ -1436,13 +1468,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, *nr_scanned = scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan, nr_taken, mode, is_file_lru(lru)); - for (scan = 0; scan < MAX_NR_ZONES; scan++) { - nr_pages = nr_zone_taken[scan]; - if (!nr_pages) - continue; - - update_lru_size(lruvec, lru, scan, -nr_pages); - } + update_lru_sizes(lruvec, lru, nr_zone_taken, nr_taken); return nr_taken; } -- cgit v1.2.3-58-ga151