diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 130 |
1 files changed, 54 insertions, 76 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 178bf9c2a2cb..2c25de46c58f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1091,14 +1091,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, { int migratetype = 0; int batch_free = 0; - unsigned long nr_scanned, flags; bool isolated_pageblocks; - spin_lock_irqsave(&zone->lock, flags); + spin_lock(&zone->lock); isolated_pageblocks = has_isolate_pageblock(zone); - nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); - if (nr_scanned) - __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); while (count) { struct page *page; @@ -1143,7 +1139,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, trace_mm_page_pcpu_drain(page, 0, mt); } while (--count && --batch_free && !list_empty(list)); } - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); } static void free_one_page(struct zone *zone, @@ -1151,19 +1147,13 @@ static void free_one_page(struct zone *zone, unsigned int order, int migratetype) { - unsigned long nr_scanned, flags; - spin_lock_irqsave(&zone->lock, flags); - __count_vm_events(PGFREE, 1 << order); - nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); - if (nr_scanned) - __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); - + spin_lock(&zone->lock); if (unlikely(has_isolate_pageblock(zone) || is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -1241,6 +1231,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) static void __free_pages_ok(struct page *page, unsigned int order) { + unsigned long flags; int migratetype; unsigned long pfn = page_to_pfn(page); @@ -1248,7 +1239,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) return; migratetype = get_pfnblock_migratetype(page, pfn); + local_irq_save(flags); + __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, pfn, order, migratetype); + local_irq_restore(flags); } static void __init __free_pages_boot_core(struct page *page, unsigned int order) @@ -1696,10 +1690,10 @@ static inline int check_new_page(struct page *page) return 1; } -static inline bool free_pages_prezeroed(bool poisoned) +static inline bool free_pages_prezeroed(void) { return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && - page_poisoning_enabled() && poisoned; + page_poisoning_enabled(); } #ifdef CONFIG_DEBUG_VM @@ -1753,17 +1747,10 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags unsigned int alloc_flags) { int i; - bool poisoned = true; - - for (i = 0; i < (1 << order); i++) { - struct page *p = page + i; - if (poisoned) - poisoned &= page_is_poisoned(p); - } post_alloc_hook(page, order, gfp_flags); - if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO)) + if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO)) for (i = 0; i < (1 << order); i++) clear_highpage(page + i); @@ -2043,8 +2030,8 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, /* Yoink! */ mt = get_pageblock_migratetype(page); - if (mt != MIGRATE_HIGHATOMIC && - !is_migrate_isolate(mt) && !is_migrate_cma(mt)) { + if (!is_migrate_highatomic(mt) && !is_migrate_isolate(mt) + && !is_migrate_cma(mt)) { zone->nr_reserved_highatomic += pageblock_nr_pages; set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); move_freepages_block(zone, page, MIGRATE_HIGHATOMIC); @@ -2101,8 +2088,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * from highatomic to ac->migratetype. So we should * adjust the count once. */ - if (get_pageblock_migratetype(page) == - MIGRATE_HIGHATOMIC) { + if (is_migrate_highatomic_page(page)) { /* * It should never happen but changes to * locking could inadvertently allow a per-cpu @@ -2159,8 +2145,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) page = list_first_entry(&area->free_list[fallback_mt], struct page, lru); - if (can_steal && - get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC) + if (can_steal && !is_migrate_highatomic_page(page)) steal_suitable_fallback(zone, page, start_migratetype); /* Remove the page from the freelists */ @@ -2220,9 +2205,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, int migratetype, bool cold) { int i, alloced = 0; - unsigned long flags; - spin_lock_irqsave(&zone->lock, flags); + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) @@ -2258,7 +2242,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); return alloced; } @@ -2374,6 +2358,13 @@ void drain_all_pages(struct zone *zone) */ static cpumask_t cpus_with_pcps; + /* + * Make sure nobody triggers this path before mm_percpu_wq is fully + * initialized. + */ + if (WARN_ON_ONCE(!mm_percpu_wq)) + return; + /* Workqueues cannot recurse */ if (current->flags & PF_WQ_WORKER) return; @@ -2423,7 +2414,7 @@ void drain_all_pages(struct zone *zone) for_each_cpu(cpu, &cpus_with_pcps) { struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); INIT_WORK(work, drain_local_pages_wq); - schedule_work_on(cpu, work); + queue_work_on(cpu, mm_percpu_wq, work); } for_each_cpu(cpu, &cpus_with_pcps) flush_work(per_cpu_ptr(&pcpu_drain, cpu)); @@ -2479,25 +2470,22 @@ void free_hot_cold_page(struct page *page, bool cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; + unsigned long flags; unsigned long pfn = page_to_pfn(page); int migratetype; - if (in_interrupt()) { - __free_pages_ok(page, 0); - return; - } - if (!free_pcp_prepare(page)) return; migratetype = get_pfnblock_migratetype(page, pfn); set_pcppage_migratetype(page, migratetype); - preempt_disable(); + local_irq_save(flags); + __count_vm_event(PGFREE); /* * We only track unmovable, reclaimable and movable on pcp lists. * Free ISOLATE pages back to the allocator because they are being - * offlined but treat RESERVE as movable pages so we can get those + * offlined but treat HIGHATOMIC as movable pages so we can get those * areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ @@ -2509,7 +2497,6 @@ void free_hot_cold_page(struct page *page, bool cold) migratetype = MIGRATE_MOVABLE; } - __count_vm_event(PGFREE); pcp = &this_cpu_ptr(zone->pageset)->pcp; if (!cold) list_add(&page->lru, &pcp->lists[migratetype]); @@ -2523,7 +2510,7 @@ void free_hot_cold_page(struct page *page, bool cold) } out: - preempt_enable(); + local_irq_restore(flags); } /* @@ -2608,7 +2595,7 @@ int __isolate_free_page(struct page *page, unsigned int order) for (; page < endpage; page += pageblock_nr_pages) { int mt = get_pageblock_migratetype(page); if (!is_migrate_isolate(mt) && !is_migrate_cma(mt) - && mt != MIGRATE_HIGHATOMIC) + && !is_migrate_highatomic(mt)) set_pageblock_migratetype(page, MIGRATE_MOVABLE); } @@ -2648,8 +2635,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, { struct page *page; - VM_BUG_ON(in_interrupt()); - do { if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, @@ -2680,8 +2665,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, struct list_head *list; bool cold = ((gfp_flags & __GFP_COLD) != 0); struct page *page; + unsigned long flags; - preempt_disable(); + local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); @@ -2689,7 +2675,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); } - preempt_enable(); + local_irq_restore(flags); return page; } @@ -2705,7 +2691,7 @@ struct page *rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; - if (likely(order == 0) && !in_interrupt()) { + if (likely(order == 0)) { page = rmqueue_pcplist(preferred_zone, zone, order, gfp_flags, migratetype); goto out; @@ -3107,8 +3093,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || - debug_guardpage_minorder() > 0) + if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) return; pr_warn("%s: ", current->comm); @@ -3519,19 +3504,12 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) } /* - * Maximum number of reclaim retries without any progress before OOM killer - * is consider as the only way to move forward. - */ -#define MAX_RECLAIM_RETRIES 16 - -/* * Checks whether it makes sense to retry the reclaim to make a forward progress * for the given allocation request. - * The reclaim feedback represented by did_some_progress (any progress during - * the last reclaim round) and no_progress_loops (number of reclaim rounds without - * any progress in a row) is considered as well as the reclaimable pages on the - * applicable zone list (with a backoff mechanism which is a function of - * no_progress_loops). + * + * We give up when we either have tried MAX_RECLAIM_RETRIES in a row + * without success, or when we couldn't even meet the watermark if we + * reclaimed all remaining pages on the LRU lists. * * Returns true if a retry is viable or false to enter the oom path. */ @@ -3576,13 +3554,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, bool wmark; available = reclaimable = zone_reclaimable_pages(zone); - available -= DIV_ROUND_UP((*no_progress_loops) * available, - MAX_RECLAIM_RETRIES); available += zone_page_state_snapshot(zone, NR_FREE_PAGES); /* - * Would the allocation succeed if we reclaimed the whole - * available? + * Would the allocation succeed if we reclaimed all + * reclaimable pages? */ wmark = __zone_watermark_ok(zone, order, min_wmark, ac_classzone_idx(ac), alloc_flags, available); @@ -3768,7 +3744,7 @@ retry: /* Make sure we know about allocations which stall for too long */ if (time_after(jiffies, alloc_start + stall_timeout)) { - warn_alloc(gfp_mask, ac->nodemask, + warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, "page allocation stalls for %ums, order:%u", jiffies_to_msecs(jiffies-alloc_start), order); stall_timeout += 10 * HZ; @@ -3968,10 +3944,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, goto out; /* - * Runtime PM, block IO and its error handling path can deadlock - * because I/O on the device might not complete. + * Apply scoped allocation constraints. This is mainly about GFP_NOFS + * resp. GFP_NOIO which has to be inherited for all allocation requests + * from a particular context which has been marked by + * memalloc_no{fs,io}_{save,restore}. */ - alloc_mask = memalloc_noio_flags(gfp_mask); + alloc_mask = current_gfp_context(gfp_mask); ac.spread_dirty_pages = false; /* @@ -4244,7 +4222,8 @@ EXPORT_SYMBOL(free_pages_exact); * nr_free_zone_pages() counts the number of counts pages which are beyond the * high watermark within all zones at or below a given zone index. For each * zone, the number of pages is calculated as: - * managed_pages - high_pages + * + * nr_free_zone_pages = managed_pages - high_pages */ static unsigned long nr_free_zone_pages(int offset) { @@ -4506,7 +4485,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) #endif " writeback_tmp:%lukB" " unstable:%lukB" - " pages_scanned:%lu" " all_unreclaimable? %s" "\n", pgdat->node_id, @@ -4520,17 +4498,17 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(node_page_state(pgdat, NR_FILE_MAPPED)), K(node_page_state(pgdat, NR_FILE_DIRTY)), K(node_page_state(pgdat, NR_WRITEBACK)), + K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), #endif - K(node_page_state(pgdat, NR_SHMEM)), K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)), - node_page_state(pgdat, NR_PAGES_SCANNED), - !pgdat_reclaimable(pgdat) ? "yes" : "no"); + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? + "yes" : "no"); } for_each_populated_zone(zone) { @@ -7425,7 +7403,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, .zone = page_zone(pfn_to_page(start)), .mode = MIGRATE_SYNC, .ignore_skip_hint = true, - .gfp_mask = memalloc_noio_flags(gfp_mask), + .gfp_mask = current_gfp_context(gfp_mask), }; INIT_LIST_HEAD(&cc.migratepages); |