summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/vm.txt38
-rw-r--r--mm/vmscan.c9
2 files changed, 37 insertions, 10 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 44518c023949..4bca2a3d9174 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -127,17 +127,39 @@ the high water marks for each per cpu page list.
zone_reclaim_mode:
-This is set during bootup to 1 if it is determined that pages from
-remote zones will cause a significant performance reduction. The
+Zone_reclaim_mode allows to set more or less agressive approaches to
+reclaim memory when a zone runs out of memory. If it is set to zero then no
+zone reclaim occurs. Allocations will be satisfied from other zones / nodes
+in the system.
+
+This is value ORed together of
+
+1 = Zone reclaim on
+2 = Zone reclaim writes dirty pages out
+4 = Zone reclaim swaps pages
+
+zone_reclaim_mode is set during bootup to 1 if it is determined that pages
+from remote zones will cause a measurable performance reduction. The
page allocator will then reclaim easily reusable pages (those page
-cache pages that are currently not used) before going off node.
+cache pages that are currently not used) before allocating off node pages.
+
+It may be beneficial to switch off zone reclaim if the system is
+used for a file server and all of memory should be used for caching files
+from disk. In that case the caching effect is more important than
+data locality.
+
+Allowing zone reclaim to write out pages stops processes that are
+writing large amounts of data from dirtying pages on other nodes. Zone
+reclaim will write out dirty pages if a zone fills up and so effectively
+throttle the process. This may decrease the performance of a single process
+since it cannot use all of system memory to buffer the outgoing writes
+anymore but it preserve the memory on other nodes so that the performance
+of other processes running on other nodes will not be affected.
-The user can override this setting. It may be beneficial to switch
-off zone reclaim if the system is used for a file server and all
-of memory should be used for caching files from disk.
+Allowing regular swap effectively restricts allocations to the local
+node unless explicitly overridden by memory policies or cpuset
+configurations.
-It may be beneficial to switch this on if one wants to do zone
-reclaim regardless of the numa distances in the system.
================================================================
zone_reclaim_interval:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8760a4abfa1f..9e2ef3624d77 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1592,6 +1592,11 @@ module_init(kswapd_init)
*/
int zone_reclaim_mode __read_mostly;
+#define RECLAIM_OFF 0
+#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
+#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
+#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
+
/*
* Mininum time between zone reclaim scans
*/
@@ -1630,8 +1635,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
if (!cpus_empty(mask) && node_id != numa_node_id())
return 0;
- sc.may_writepage = 0;
- sc.may_swap = 0;
+ sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE);
+ sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP);
sc.nr_scanned = 0;
sc.nr_reclaimed = 0;
sc.priority = ZONE_RECLAIM_PRIORITY + 1;