summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mm/internal.h13
-rw-r--r--mm/page_alloc.c108
2 files changed, 105 insertions, 16 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 6a57811ae47d..2ffe0cd64c70 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -490,10 +490,15 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_OOM ALLOC_NO_WATERMARKS
#endif
-#define ALLOC_HARDER 0x10 /* try to alloc harder */
-#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
-#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
-#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
+#define ALLOC_HARDER 0x10 /* try to alloc harder */
+#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
+#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
+#ifdef CONFIG_ZONE_DMA32
+#define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */
+#else
+#define ALLOC_NOFRAGMENT 0x0
+#endif
enum ttu_flags;
struct tlbflush_unmap_batch;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 298b449a03c7..251b8a0c9c5d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2375,20 +2375,30 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* condition simpler.
*/
static __always_inline bool
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
+ unsigned int alloc_flags)
{
struct free_area *area;
int current_order;
+ int min_order = order;
struct page *page;
int fallback_mt;
bool can_steal;
/*
+ * Do not steal pages from freelists belonging to other pageblocks
+ * i.e. orders < pageblock_order. If there are no local zones free,
+ * the zonelists will be reiterated without ALLOC_NOFRAGMENT.
+ */
+ if (alloc_flags & ALLOC_NOFRAGMENT)
+ min_order = pageblock_order;
+
+ /*
* Find the largest available free page in the other list. This roughly
* approximates finding the pageblock with the most free pages, which
* would be too costly to do exactly.
*/
- for (current_order = MAX_ORDER - 1; current_order >= order;
+ for (current_order = MAX_ORDER - 1; current_order >= min_order;
--current_order) {
area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order,
@@ -2447,7 +2457,8 @@ do_steal:
* Call me with the zone->lock already held.
*/
static __always_inline struct page *
-__rmqueue(struct zone *zone, unsigned int order, int migratetype)
+__rmqueue(struct zone *zone, unsigned int order, int migratetype,
+ unsigned int alloc_flags)
{
struct page *page;
@@ -2457,7 +2468,8 @@ retry:
if (migratetype == MIGRATE_MOVABLE)
page = __rmqueue_cma_fallback(zone, order);
- if (!page && __rmqueue_fallback(zone, order, migratetype))
+ if (!page && __rmqueue_fallback(zone, order, migratetype,
+ alloc_flags))
goto retry;
}
@@ -2472,13 +2484,14 @@ retry:
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
- int migratetype)
+ int migratetype, unsigned int alloc_flags)
{
int i, alloced = 0;
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order, migratetype);
+ struct page *page = __rmqueue(zone, order, migratetype,
+ alloc_flags);
if (unlikely(page == NULL))
break;
@@ -2934,6 +2947,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
/* Remove page from the per-cpu list, caller must protect the list */
static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+ unsigned int alloc_flags,
struct per_cpu_pages *pcp,
struct list_head *list)
{
@@ -2943,7 +2957,7 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
- migratetype);
+ migratetype, alloc_flags);
if (unlikely(list_empty(list)))
return NULL;
}
@@ -2959,7 +2973,8 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
- gfp_t gfp_flags, int migratetype)
+ gfp_t gfp_flags, int migratetype,
+ unsigned int alloc_flags)
{
struct per_cpu_pages *pcp;
struct list_head *list;
@@ -2969,7 +2984,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
- page = __rmqueue_pcplist(zone, migratetype, pcp, list);
+ page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
@@ -2992,7 +3007,7 @@ struct page *rmqueue(struct zone *preferred_zone,
if (likely(order == 0)) {
page = rmqueue_pcplist(preferred_zone, zone, order,
- gfp_flags, migratetype);
+ gfp_flags, migratetype, alloc_flags);
goto out;
}
@@ -3011,7 +3026,7 @@ struct page *rmqueue(struct zone *preferred_zone,
trace_mm_page_alloc_zone_locked(page, order, migratetype);
}
if (!page)
- page = __rmqueue(zone, order, migratetype);
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
} while (page && check_new_pages(page, order));
spin_unlock(&zone->lock);
if (!page)
@@ -3253,6 +3268,40 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
}
#endif /* CONFIG_NUMA */
+#ifdef CONFIG_ZONE_DMA32
+/*
+ * The restriction on ZONE_DMA32 as being a suitable zone to use to avoid
+ * fragmentation is subtle. If the preferred zone was HIGHMEM then
+ * premature use of a lower zone may cause lowmem pressure problems that
+ * are worse than fragmentation. If the next zone is ZONE_DMA then it is
+ * probably too small. It only makes sense to spread allocations to avoid
+ * fragmentation between the Normal and DMA32 zones.
+ */
+static inline unsigned int
+alloc_flags_nofragment(struct zone *zone)
+{
+ if (zone_idx(zone) != ZONE_NORMAL)
+ return 0;
+
+ /*
+ * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
+ * the pointer is within zone->zone_pgdat->node_zones[]. Also assume
+ * on UMA that if Normal is populated then so is DMA32.
+ */
+ BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
+ if (nr_online_nodes > 1 && !populated_zone(--zone))
+ return 0;
+
+ return ALLOC_NOFRAGMENT;
+}
+#else
+static inline unsigned int
+alloc_flags_nofragment(struct zone *zone)
+{
+ return 0;
+}
+#endif
+
/*
* get_page_from_freelist goes through the zonelist trying to allocate
* a page.
@@ -3261,14 +3310,18 @@ static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
const struct alloc_context *ac)
{
- struct zoneref *z = ac->preferred_zoneref;
+ struct zoneref *z;
struct zone *zone;
struct pglist_data *last_pgdat_dirty_limit = NULL;
+ bool no_fallback;
+retry:
/*
* Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed() comment in kernel/cpuset.c.
*/
+ no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
+ z = ac->preferred_zoneref;
for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
ac->nodemask) {
struct page *page;
@@ -3307,6 +3360,22 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
}
}
+ if (no_fallback && nr_online_nodes > 1 &&
+ zone != ac->preferred_zoneref->zone) {
+ int local_nid;
+
+ /*
+ * If moving to a remote node, retry but allow
+ * fragmenting fallbacks. Locality is more important
+ * than fragmentation avoidance.
+ */
+ local_nid = zone_to_nid(ac->preferred_zoneref->zone);
+ if (zone_to_nid(zone) != local_nid) {
+ alloc_flags &= ~ALLOC_NOFRAGMENT;
+ goto retry;
+ }
+ }
+
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
if (!zone_watermark_fast(zone, order, mark,
ac_classzone_idx(ac), alloc_flags)) {
@@ -3374,6 +3443,15 @@ try_this_zone:
}
}
+ /*
+ * It's possible on a UMA machine to get through all zones that are
+ * fragmented. If avoiding fragmentation, reset and try again.
+ */
+ if (no_fallback) {
+ alloc_flags &= ~ALLOC_NOFRAGMENT;
+ goto retry;
+ }
+
return NULL;
}
@@ -4369,6 +4447,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
finalise_ac(gfp_mask, &ac);
+ /*
+ * Forbid the first pass from falling back to types that fragment
+ * memory until all local zones are considered.
+ */
+ alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone);
+
/* First allocation attempt */
page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
if (likely(page))