summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c164
1 files changed, 62 insertions, 102 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 142cb61f4822..c4a2f4512fca 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -191,7 +191,7 @@ static bool sane_reclaim(struct scan_control *sc)
}
#endif
-static unsigned long zone_reclaimable_pages(struct zone *zone)
+unsigned long zone_reclaimable_pages(struct zone *zone)
{
unsigned long nr;
@@ -633,7 +633,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
*
* Reversing the order of the tests ensures such a situation cannot
* escape unnoticed. The smp_rmb is needed to ensure the page->flags
- * load is not satisfied before that of page->_count.
+ * load is not satisfied before that of page->_refcount.
*
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under tree_lock, then this ordering is not required.
@@ -1374,7 +1374,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
!list_empty(src); scan++) {
struct page *page;
- int nr_pages;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
@@ -1383,10 +1382,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
switch (__isolate_lru_page(page, mode)) {
case 0:
- nr_pages = hpage_nr_pages(page);
- mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
+ nr_taken += hpage_nr_pages(page);
list_move(&page->lru, dst);
- nr_taken += nr_pages;
break;
case -EBUSY:
@@ -1602,8 +1599,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
&nr_scanned, sc, isolate_mode, lru);
- __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
+ update_lru_size(lruvec, lru, -nr_taken);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
+ reclaim_stat->recent_scanned[file] += nr_taken;
if (global_reclaim(sc)) {
__mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
@@ -1624,8 +1622,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
spin_lock_irq(&zone->lru_lock);
- reclaim_stat->recent_scanned[file] += nr_taken;
-
if (global_reclaim(sc)) {
if (current_is_kswapd())
__count_zone_vm_events(PGSTEAL_KSWAPD, zone,
@@ -1720,7 +1716,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* It is safe to rely on PG_active against the non-LRU pages in here because
* nobody will play with that bit on a non-LRU page.
*
- * The downside is that we have to touch page->_count against each page.
+ * The downside is that we have to touch page->_refcount against each page.
* But we had to alter page->flags anyway.
*/
@@ -1742,7 +1738,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
SetPageLRU(page);
nr_pages = hpage_nr_pages(page);
- mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
+ update_lru_size(lruvec, lru, nr_pages);
list_move(&page->lru, &lruvec->lists[lru]);
pgmoved += nr_pages;
@@ -1760,7 +1756,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
list_add(&page->lru, pages_to_free);
}
}
- __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
+
if (!is_active_lru(lru))
__count_vm_events(PGDEACTIVATE, pgmoved);
}
@@ -1794,14 +1790,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
- if (global_reclaim(sc))
- __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
+ update_lru_size(lruvec, lru, -nr_taken);
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
reclaim_stat->recent_scanned[file] += nr_taken;
+ if (global_reclaim(sc))
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
__count_zone_vm_events(PGREFILL, zone, nr_scanned);
- __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
- __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
+
spin_unlock_irq(&zone->lru_lock);
while (!list_empty(&l_hold)) {
@@ -1865,83 +1862,63 @@ static void shrink_active_list(unsigned long nr_to_scan,
free_hot_cold_page_list(&l_hold, true);
}
-#ifdef CONFIG_SWAP
-static bool inactive_anon_is_low_global(struct zone *zone)
-{
- unsigned long active, inactive;
-
- active = zone_page_state(zone, NR_ACTIVE_ANON);
- inactive = zone_page_state(zone, NR_INACTIVE_ANON);
-
- return inactive * zone->inactive_ratio < active;
-}
-
-/**
- * inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @lruvec: LRU vector to check
+/*
+ * The inactive anon list should be small enough that the VM never has
+ * to do too much work.
*
- * Returns true if the zone does not have enough inactive anon pages,
- * meaning some active anon pages need to be deactivated.
- */
-static bool inactive_anon_is_low(struct lruvec *lruvec)
-{
- /*
- * If we don't have swap space, anonymous page deactivation
- * is pointless.
- */
- if (!total_swap_pages)
- return false;
-
- if (!mem_cgroup_disabled())
- return mem_cgroup_inactive_anon_is_low(lruvec);
-
- return inactive_anon_is_low_global(lruvec_zone(lruvec));
-}
-#else
-static inline bool inactive_anon_is_low(struct lruvec *lruvec)
-{
- return false;
-}
-#endif
-
-/**
- * inactive_file_is_low - check if file pages need to be deactivated
- * @lruvec: LRU vector to check
+ * The inactive file list should be small enough to leave most memory
+ * to the established workingset on the scan-resistant active list,
+ * but large enough to avoid thrashing the aggregate readahead window.
*
- * When the system is doing streaming IO, memory pressure here
- * ensures that active file pages get deactivated, until more
- * than half of the file pages are on the inactive list.
+ * Both inactive lists should also be large enough that each inactive
+ * page has a chance to be referenced again before it is reclaimed.
*
- * Once we get to that situation, protect the system's working
- * set from being evicted by disabling active file page aging.
+ * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages
+ * on this LRU, maintained by the pageout code. A zone->inactive_ratio
+ * of 3 means 3:1 or 25% of the pages are kept on the inactive list.
*
- * This uses a different ratio than the anonymous pages, because
- * the page cache uses a use-once replacement algorithm.
+ * total target max
+ * memory ratio inactive
+ * -------------------------------------
+ * 10MB 1 5MB
+ * 100MB 1 50MB
+ * 1GB 3 250MB
+ * 10GB 10 0.9GB
+ * 100GB 31 3GB
+ * 1TB 101 10GB
+ * 10TB 320 32GB
*/
-static bool inactive_file_is_low(struct lruvec *lruvec)
+static bool inactive_list_is_low(struct lruvec *lruvec, bool file)
{
+ unsigned long inactive_ratio;
unsigned long inactive;
unsigned long active;
+ unsigned long gb;
- inactive = lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
- active = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
+ /*
+ * If we don't have swap space, anonymous page deactivation
+ * is pointless.
+ */
+ if (!file && !total_swap_pages)
+ return false;
- return active > inactive;
-}
+ inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
+ active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
-static bool inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
-{
- if (is_file_lru(lru))
- return inactive_file_is_low(lruvec);
+ gb = (inactive + active) >> (30 - PAGE_SHIFT);
+ if (gb)
+ inactive_ratio = int_sqrt(10 * gb);
else
- return inactive_anon_is_low(lruvec);
+ inactive_ratio = 1;
+
+ return inactive * inactive_ratio < active;
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, lru))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru)))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
@@ -2062,7 +2039,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_file_is_low(lruvec) &&
+ if (!inactive_list_is_low(lruvec, true) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
@@ -2304,7 +2281,7 @@ static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg,
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_anon_is_low(lruvec))
+ if (inactive_list_is_low(lruvec, false))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
@@ -2482,7 +2459,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
* Returns true if compaction should go ahead for a high-order request, or
* the high-order allocation would succeed without compaction.
*/
-static inline bool compaction_ready(struct zone *zone, int order)
+static inline bool compaction_ready(struct zone *zone, int order, int classzone_idx)
{
unsigned long balance_gap, watermark;
bool watermark_ok;
@@ -2496,7 +2473,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
- watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0);
+ watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, classzone_idx);
/*
* If compaction is deferred, reclaim up to a point where
@@ -2509,7 +2486,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
* If compaction is not ready to start and allocation is not likely
* to succeed without it, then keep reclaiming.
*/
- if (compaction_suitable(zone, order, 0, 0) == COMPACT_SKIPPED)
+ if (compaction_suitable(zone, order, 0, classzone_idx) == COMPACT_SKIPPED)
return false;
return watermark_ok;
@@ -2530,10 +2507,8 @@ static inline bool compaction_ready(struct zone *zone, int order)
*
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
- *
- * Returns true if a zone was reclaimable.
*/
-static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
+static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
@@ -2541,7 +2516,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_scanned;
gfp_t orig_mask;
enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
- bool reclaimable = false;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -2589,7 +2563,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
if (IS_ENABLED(CONFIG_COMPACTION) &&
sc->order > PAGE_ALLOC_COSTLY_ORDER &&
zonelist_zone_idx(z) <= requested_highidx &&
- compaction_ready(zone, sc->order)) {
+ compaction_ready(zone, sc->order, requested_highidx)) {
sc->compaction_ready = true;
continue;
}
@@ -2606,17 +2580,10 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
- if (nr_soft_reclaimed)
- reclaimable = true;
/* need some check for avoid more shrink_zone() */
}
- if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
- reclaimable = true;
-
- if (global_reclaim(sc) &&
- !reclaimable && zone_reclaimable(zone))
- reclaimable = true;
+ shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
}
/*
@@ -2624,8 +2591,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
* promoted it to __GFP_HIGHMEM.
*/
sc->gfp_mask = orig_mask;
-
- return reclaimable;
}
/*
@@ -2650,7 +2615,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
int initial_priority = sc->priority;
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
- bool zones_reclaimable;
retry:
delayacct_freepages_start();
@@ -2661,7 +2625,7 @@ retry:
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
- zones_reclaimable = shrink_zones(zonelist, sc);
+ shrink_zones(zonelist, sc);
total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
@@ -2708,10 +2672,6 @@ retry:
goto retry;
}
- /* Any of the zones still reclaimable? Don't OOM. */
- if (zones_reclaimable)
- return 1;
-
return 0;
}
@@ -2965,7 +2925,7 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
do {
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
- if (inactive_anon_is_low(lruvec))
+ if (inactive_list_is_low(lruvec, false))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);