aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c130
1 files changed, 54 insertions, 76 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 178bf9c2a2cb..2c25de46c58f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1091,14 +1091,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
{
int migratetype = 0;
int batch_free = 0;
- unsigned long nr_scanned, flags;
bool isolated_pageblocks;
- spin_lock_irqsave(&zone->lock, flags);
+ spin_lock(&zone->lock);
isolated_pageblocks = has_isolate_pageblock(zone);
- nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
- if (nr_scanned)
- __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
while (count) {
struct page *page;
@@ -1143,7 +1139,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
trace_mm_page_pcpu_drain(page, 0, mt);
} while (--count && --batch_free && !list_empty(list));
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone,
@@ -1151,19 +1147,13 @@ static void free_one_page(struct zone *zone,
unsigned int order,
int migratetype)
{
- unsigned long nr_scanned, flags;
- spin_lock_irqsave(&zone->lock, flags);
- __count_vm_events(PGFREE, 1 << order);
- nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
- if (nr_scanned)
- __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
-
+ spin_lock(&zone->lock);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype);
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
}
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1241,6 +1231,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
static void __free_pages_ok(struct page *page, unsigned int order)
{
+ unsigned long flags;
int migratetype;
unsigned long pfn = page_to_pfn(page);
@@ -1248,7 +1239,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
return;
migratetype = get_pfnblock_migratetype(page, pfn);
+ local_irq_save(flags);
+ __count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, pfn, order, migratetype);
+ local_irq_restore(flags);
}
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -1696,10 +1690,10 @@ static inline int check_new_page(struct page *page)
return 1;
}
-static inline bool free_pages_prezeroed(bool poisoned)
+static inline bool free_pages_prezeroed(void)
{
return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
- page_poisoning_enabled() && poisoned;
+ page_poisoning_enabled();
}
#ifdef CONFIG_DEBUG_VM
@@ -1753,17 +1747,10 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
unsigned int alloc_flags)
{
int i;
- bool poisoned = true;
-
- for (i = 0; i < (1 << order); i++) {
- struct page *p = page + i;
- if (poisoned)
- poisoned &= page_is_poisoned(p);
- }
post_alloc_hook(page, order, gfp_flags);
- if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
+ if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO))
for (i = 0; i < (1 << order); i++)
clear_highpage(page + i);
@@ -2043,8 +2030,8 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
/* Yoink! */
mt = get_pageblock_migratetype(page);
- if (mt != MIGRATE_HIGHATOMIC &&
- !is_migrate_isolate(mt) && !is_migrate_cma(mt)) {
+ if (!is_migrate_highatomic(mt) && !is_migrate_isolate(mt)
+ && !is_migrate_cma(mt)) {
zone->nr_reserved_highatomic += pageblock_nr_pages;
set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
move_freepages_block(zone, page, MIGRATE_HIGHATOMIC);
@@ -2101,8 +2088,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* from highatomic to ac->migratetype. So we should
* adjust the count once.
*/
- if (get_pageblock_migratetype(page) ==
- MIGRATE_HIGHATOMIC) {
+ if (is_migrate_highatomic_page(page)) {
/*
* It should never happen but changes to
* locking could inadvertently allow a per-cpu
@@ -2159,8 +2145,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
page = list_first_entry(&area->free_list[fallback_mt],
struct page, lru);
- if (can_steal &&
- get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC)
+ if (can_steal && !is_migrate_highatomic_page(page))
steal_suitable_fallback(zone, page, start_migratetype);
/* Remove the page from the freelists */
@@ -2220,9 +2205,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
int migratetype, bool cold)
{
int i, alloced = 0;
- unsigned long flags;
- spin_lock_irqsave(&zone->lock, flags);
+ spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
@@ -2258,7 +2242,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
return alloced;
}
@@ -2374,6 +2358,13 @@ void drain_all_pages(struct zone *zone)
*/
static cpumask_t cpus_with_pcps;
+ /*
+ * Make sure nobody triggers this path before mm_percpu_wq is fully
+ * initialized.
+ */
+ if (WARN_ON_ONCE(!mm_percpu_wq))
+ return;
+
/* Workqueues cannot recurse */
if (current->flags & PF_WQ_WORKER)
return;
@@ -2423,7 +2414,7 @@ void drain_all_pages(struct zone *zone)
for_each_cpu(cpu, &cpus_with_pcps) {
struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
INIT_WORK(work, drain_local_pages_wq);
- schedule_work_on(cpu, work);
+ queue_work_on(cpu, mm_percpu_wq, work);
}
for_each_cpu(cpu, &cpus_with_pcps)
flush_work(per_cpu_ptr(&pcpu_drain, cpu));
@@ -2479,25 +2470,22 @@ void free_hot_cold_page(struct page *page, bool cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
+ unsigned long flags;
unsigned long pfn = page_to_pfn(page);
int migratetype;
- if (in_interrupt()) {
- __free_pages_ok(page, 0);
- return;
- }
-
if (!free_pcp_prepare(page))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
set_pcppage_migratetype(page, migratetype);
- preempt_disable();
+ local_irq_save(flags);
+ __count_vm_event(PGFREE);
/*
* We only track unmovable, reclaimable and movable on pcp lists.
* Free ISOLATE pages back to the allocator because they are being
- * offlined but treat RESERVE as movable pages so we can get those
+ * offlined but treat HIGHATOMIC as movable pages so we can get those
* areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
@@ -2509,7 +2497,6 @@ void free_hot_cold_page(struct page *page, bool cold)
migratetype = MIGRATE_MOVABLE;
}
- __count_vm_event(PGFREE);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (!cold)
list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2523,7 +2510,7 @@ void free_hot_cold_page(struct page *page, bool cold)
}
out:
- preempt_enable();
+ local_irq_restore(flags);
}
/*
@@ -2608,7 +2595,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
for (; page < endpage; page += pageblock_nr_pages) {
int mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)
- && mt != MIGRATE_HIGHATOMIC)
+ && !is_migrate_highatomic(mt))
set_pageblock_migratetype(page,
MIGRATE_MOVABLE);
}
@@ -2648,8 +2635,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
{
struct page *page;
- VM_BUG_ON(in_interrupt());
-
do {
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
@@ -2680,8 +2665,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct list_head *list;
bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct page *page;
+ unsigned long flags;
- preempt_disable();
+ local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
@@ -2689,7 +2675,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
}
- preempt_enable();
+ local_irq_restore(flags);
return page;
}
@@ -2705,7 +2691,7 @@ struct page *rmqueue(struct zone *preferred_zone,
unsigned long flags;
struct page *page;
- if (likely(order == 0) && !in_interrupt()) {
+ if (likely(order == 0)) {
page = rmqueue_pcplist(preferred_zone, zone, order,
gfp_flags, migratetype);
goto out;
@@ -3107,8 +3093,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
- debug_guardpage_minorder() > 0)
+ if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
return;
pr_warn("%s: ", current->comm);
@@ -3519,19 +3504,12 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
}
/*
- * Maximum number of reclaim retries without any progress before OOM killer
- * is consider as the only way to move forward.
- */
-#define MAX_RECLAIM_RETRIES 16
-
-/*
* Checks whether it makes sense to retry the reclaim to make a forward progress
* for the given allocation request.
- * The reclaim feedback represented by did_some_progress (any progress during
- * the last reclaim round) and no_progress_loops (number of reclaim rounds without
- * any progress in a row) is considered as well as the reclaimable pages on the
- * applicable zone list (with a backoff mechanism which is a function of
- * no_progress_loops).
+ *
+ * We give up when we either have tried MAX_RECLAIM_RETRIES in a row
+ * without success, or when we couldn't even meet the watermark if we
+ * reclaimed all remaining pages on the LRU lists.
*
* Returns true if a retry is viable or false to enter the oom path.
*/
@@ -3576,13 +3554,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
bool wmark;
available = reclaimable = zone_reclaimable_pages(zone);
- available -= DIV_ROUND_UP((*no_progress_loops) * available,
- MAX_RECLAIM_RETRIES);
available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
/*
- * Would the allocation succeed if we reclaimed the whole
- * available?
+ * Would the allocation succeed if we reclaimed all
+ * reclaimable pages?
*/
wmark = __zone_watermark_ok(zone, order, min_wmark,
ac_classzone_idx(ac), alloc_flags, available);
@@ -3768,7 +3744,7 @@ retry:
/* Make sure we know about allocations which stall for too long */
if (time_after(jiffies, alloc_start + stall_timeout)) {
- warn_alloc(gfp_mask, ac->nodemask,
+ warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask,
"page allocation stalls for %ums, order:%u",
jiffies_to_msecs(jiffies-alloc_start), order);
stall_timeout += 10 * HZ;
@@ -3968,10 +3944,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
goto out;
/*
- * Runtime PM, block IO and its error handling path can deadlock
- * because I/O on the device might not complete.
+ * Apply scoped allocation constraints. This is mainly about GFP_NOFS
+ * resp. GFP_NOIO which has to be inherited for all allocation requests
+ * from a particular context which has been marked by
+ * memalloc_no{fs,io}_{save,restore}.
*/
- alloc_mask = memalloc_noio_flags(gfp_mask);
+ alloc_mask = current_gfp_context(gfp_mask);
ac.spread_dirty_pages = false;
/*
@@ -4244,7 +4222,8 @@ EXPORT_SYMBOL(free_pages_exact);
* nr_free_zone_pages() counts the number of counts pages which are beyond the
* high watermark within all zones at or below a given zone index. For each
* zone, the number of pages is calculated as:
- * managed_pages - high_pages
+ *
+ * nr_free_zone_pages = managed_pages - high_pages
*/
static unsigned long nr_free_zone_pages(int offset)
{
@@ -4506,7 +4485,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
#endif
" writeback_tmp:%lukB"
" unstable:%lukB"
- " pages_scanned:%lu"
" all_unreclaimable? %s"
"\n",
pgdat->node_id,
@@ -4520,17 +4498,17 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_FILE_MAPPED)),
K(node_page_state(pgdat, NR_FILE_DIRTY)),
K(node_page_state(pgdat, NR_WRITEBACK)),
+ K(node_page_state(pgdat, NR_SHMEM)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
* HPAGE_PMD_NR),
K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
#endif
- K(node_page_state(pgdat, NR_SHMEM)),
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
- node_page_state(pgdat, NR_PAGES_SCANNED),
- !pgdat_reclaimable(pgdat) ? "yes" : "no");
+ pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
+ "yes" : "no");
}
for_each_populated_zone(zone) {
@@ -7425,7 +7403,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
.zone = page_zone(pfn_to_page(start)),
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
- .gfp_mask = memalloc_noio_flags(gfp_mask),
+ .gfp_mask = current_gfp_context(gfp_mask),
};
INIT_LIST_HEAD(&cc.migratepages);