summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mmzone.h21
-rw-r--r--include/linux/swap.h2
-rw-r--r--mm/memcontrol.c18
-rw-r--r--mm/swap.c19
-rw-r--r--mm/vmscan.c44
5 files changed, 45 insertions, 59 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2f79ff4477ba..e57248ccb63d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -242,19 +242,6 @@ static inline bool is_active_lru(enum lru_list lru)
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}
-struct zone_reclaim_stat {
- /*
- * The pageout code in vmscan.c keeps track of how many of the
- * mem/swap backed and file backed pages are referenced.
- * The higher the rotated/scanned ratio, the more valuable
- * that cache is.
- *
- * The anon LRU stats live in [0], file LRU stats in [1]
- */
- unsigned long recent_rotated[2];
- unsigned long recent_scanned[2];
-};
-
enum lruvec_flags {
LRUVEC_CONGESTED, /* lruvec has many dirty pages
* backed by a congested BDI
@@ -263,7 +250,13 @@ enum lruvec_flags {
struct lruvec {
struct list_head lists[NR_LRU_LISTS];
- struct zone_reclaim_stat reclaim_stat;
+ /*
+ * These track the cost of reclaiming one LRU - file or anon -
+ * over the other. As the observed cost of reclaiming one LRU
+ * increases, the reclaim scan balance tips toward the other.
+ */
+ unsigned long anon_cost;
+ unsigned long file_cost;
/* Evictions & activations on the inactive file list */
atomic_long_t inactive_age;
/* Refaults at the time of last reclaim cycle */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 217bc8e13feb..ce3c55747006 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -334,6 +334,8 @@ extern unsigned long nr_free_pagecache_pages(void);
/* linux/mm/swap.c */
+extern void lru_note_cost(struct lruvec *lruvec, bool file,
+ unsigned int nr_pages);
extern void lru_cache_add(struct page *);
extern void lru_add_page_tail(struct page *page, struct page *page_tail,
struct lruvec *lruvec, struct list_head *head);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ee6fe8bb0bff..5381afb23d58 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3853,23 +3853,17 @@ static int memcg_stat_show(struct seq_file *m, void *v)
{
pg_data_t *pgdat;
struct mem_cgroup_per_node *mz;
- struct zone_reclaim_stat *rstat;
- unsigned long recent_rotated[2] = {0, 0};
- unsigned long recent_scanned[2] = {0, 0};
+ unsigned long anon_cost = 0;
+ unsigned long file_cost = 0;
for_each_online_pgdat(pgdat) {
mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
- rstat = &mz->lruvec.reclaim_stat;
- recent_rotated[0] += rstat->recent_rotated[0];
- recent_rotated[1] += rstat->recent_rotated[1];
- recent_scanned[0] += rstat->recent_scanned[0];
- recent_scanned[1] += rstat->recent_scanned[1];
+ anon_cost += mz->lruvec.anon_cost;
+ file_cost += mz->lruvec.file_cost;
}
- seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
- seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
- seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
- seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]);
+ seq_printf(m, "anon_cost %lu\n", anon_cost);
+ seq_printf(m, "file_cost %lu\n", file_cost);
}
#endif
diff --git a/mm/swap.c b/mm/swap.c
index 116b609c25c1..fedeb925dbfe 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -278,15 +278,12 @@ void rotate_reclaimable_page(struct page *page)
}
}
-static void update_page_reclaim_stat(struct lruvec *lruvec,
- int file, int rotated,
- unsigned int nr_pages)
+void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
{
- struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
-
- reclaim_stat->recent_scanned[file] += nr_pages;
- if (rotated)
- reclaim_stat->recent_rotated[file] += nr_pages;
+ if (file)
+ lruvec->file_cost += nr_pages;
+ else
+ lruvec->anon_cost += nr_pages;
}
static void __activate_page(struct page *page, struct lruvec *lruvec,
@@ -541,7 +538,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
if (active)
__count_vm_event(PGDEACTIVATE);
- update_page_reclaim_stat(lruvec, file, 0, hpage_nr_pages(page));
+ lru_note_cost(lruvec, !file, hpage_nr_pages(page));
}
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
@@ -557,7 +554,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
add_page_to_lru_list(page, lruvec, lru);
__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
- update_page_reclaim_stat(lruvec, file, 0, hpage_nr_pages(page));
+ lru_note_cost(lruvec, !file, hpage_nr_pages(page));
}
}
@@ -582,7 +579,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
__count_vm_events(PGLAZYFREE, hpage_nr_pages(page));
count_memcg_page_event(page, PGLAZYFREE);
- update_page_reclaim_stat(lruvec, 1, 0, hpage_nr_pages(page));
+ lru_note_cost(lruvec, 0, hpage_nr_pages(page));
}
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a5a7a8d0764c..c5b2a68f4ef6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1916,7 +1916,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
bool file = is_file_lru(lru);
enum vm_event_item item;
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
bool stalled = false;
while (unlikely(too_many_isolated(pgdat, file, sc))) {
@@ -1940,7 +1939,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
&nr_scanned, sc, lru);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
- reclaim_stat->recent_scanned[file] += nr_taken;
item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
if (!cgroup_reclaim(sc))
__count_vm_events(item, nr_scanned);
@@ -1960,8 +1958,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
move_pages_to_lru(lruvec, &page_list);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
- reclaim_stat->recent_rotated[0] += stat.nr_activate[0];
- reclaim_stat->recent_rotated[1] += stat.nr_activate[1];
+ /*
+ * Rotating pages costs CPU without actually
+ * progressing toward the reclaim goal.
+ */
+ lru_note_cost(lruvec, 0, stat.nr_activate[0]);
+ lru_note_cost(lruvec, 1, stat.nr_activate[1]);
item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
if (!cgroup_reclaim(sc))
__count_vm_events(item, nr_reclaimed);
@@ -2013,7 +2015,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
- struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
unsigned nr_deactivate, nr_activate;
unsigned nr_rotated = 0;
int file = is_file_lru(lru);
@@ -2027,7 +2028,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
&nr_scanned, sc, lru);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
- reclaim_stat->recent_scanned[file] += nr_taken;
__count_vm_events(PGREFILL, nr_scanned);
__count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
@@ -2085,7 +2085,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
* helps balance scan pressure between file and anonymous pages in
* get_scan_count.
*/
- reclaim_stat->recent_rotated[file] += nr_rotated;
+ lru_note_cost(lruvec, file, nr_rotated);
nr_activate = move_pages_to_lru(lruvec, &l_active);
nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
@@ -2242,13 +2242,13 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
{
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
int swappiness = mem_cgroup_swappiness(memcg);
- struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
u64 denominator = 0; /* gcc */
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
unsigned long anon_prio, file_prio;
enum scan_balance scan_balance;
unsigned long anon, file;
+ unsigned long totalcost;
unsigned long ap, fp;
enum lru_list lru;
@@ -2324,26 +2324,26 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES);
spin_lock_irq(&pgdat->lru_lock);
- if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
- reclaim_stat->recent_scanned[0] /= 2;
- reclaim_stat->recent_rotated[0] /= 2;
- }
-
- if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
- reclaim_stat->recent_scanned[1] /= 2;
- reclaim_stat->recent_rotated[1] /= 2;
+ totalcost = lruvec->anon_cost + lruvec->file_cost;
+ if (unlikely(totalcost > (anon + file) / 4)) {
+ lruvec->anon_cost /= 2;
+ lruvec->file_cost /= 2;
+ totalcost /= 2;
}
/*
* The amount of pressure on anon vs file pages is inversely
- * proportional to the fraction of recently scanned pages on
- * each list that were recently referenced and in active use.
+ * proportional to the assumed cost of reclaiming each list,
+ * as determined by the share of pages that are likely going
+ * to refault or rotate on each list (recently referenced),
+ * times the relative IO cost of bringing back a swapped out
+ * anonymous page vs reloading a filesystem page (swappiness).
*/
- ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
- ap /= reclaim_stat->recent_rotated[0] + 1;
+ ap = anon_prio * (totalcost + 1);
+ ap /= lruvec->anon_cost + 1;
- fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
- fp /= reclaim_stat->recent_rotated[1] + 1;
+ fp = file_prio * (totalcost + 1);
+ fp /= lruvec->file_cost + 1;
spin_unlock_irq(&pgdat->lru_lock);
fraction[0] = ap;