From 1e8537baacd59e96bbe5f8d3d32feafd11f509fe Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Mon, 8 Oct 2012 16:31:51 -0700 Subject: memory-hotplug: build zonelists when offlining pages online_pages() does build_all_zonelists() and zone_pcp_update(), I think offline_pages() should do it too. When the zone has no memory to allocate, remove it from other nodes' zonelists. zone_batchsize() depends on zone's present pages, if zone's present pages are changed, zone's pcp should be updated. Signed-off-by: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6a5b90d0cfd7..b35016156c19 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -970,8 +970,13 @@ repeat: init_per_zone_wmark_min(); - if (!populated_zone(zone)) + if (!populated_zone(zone)) { zone_pcp_reset(zone); + mutex_lock(&zonelists_mutex); + build_all_zonelists(NULL, NULL); + mutex_unlock(&zonelists_mutex); + } else + zone_pcp_update(zone); if (!node_present_pages(node)) { node_clear_state(node, N_HIGH_MEMORY); -- cgit v1.2.3 From 74c08f982674cfd5dfeb2702d631db9bcdabf788 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 8 Oct 2012 16:32:54 -0700 Subject: memory-hotplug: don't replace lowmem pages with highmem The changelog for commit 6a6dccba2fdc ("mm: cma: don't replace lowmem pages with highmem") mentioned that lowmem pages can be replaced by highmem pages during CMA migration. 6a6dccba2fdc fixed that issue. Quote from that changelog: : The filesystem layer expects pages in the block device's mapping to not : be in highmem (the mapping's gfp mask is set in bdget()), but CMA can : currently replace lowmem pages with highmem pages, leading to crashes in : filesystem code such as the one below: : : Unable to handle kernel NULL pointer dereference at virtual address 00000400 : pgd = c0c98000 : [00000400] *pgd=00c91831, *pte=00000000, *ppte=00000000 : Internal error: Oops: 817 [#1] PREEMPT SMP ARM : CPU: 0 Not tainted (3.5.0-rc5+ #80) : PC is at __memzero+0x24/0x80 : ... : Process fsstress (pid: 323, stack limit = 0xc0cbc2f0) : Backtrace: : [] (ext4_getblk+0x0/0x180) from [] (ext4_bread+0x1c/0x98) : [] (ext4_bread+0x0/0x98) from [] (ext4_mkdir+0x160/0x3bc) : r4:c15337f0 : [] (ext4_mkdir+0x0/0x3bc) from [] (vfs_mkdir+0x8c/0x98) : [] (vfs_mkdir+0x0/0x98) from [] (sys_mkdirat+0x74/0xac) : r6:00000000 r5:c152eb40 r4:000001ff r3:c14b43f0 : [] (sys_mkdirat+0x0/0xac) from [] (sys_mkdir+0x20/0x24) : r6:beccdcf0 r5:00074000 r4:beccdbbc : [] (sys_mkdir+0x0/0x24) from [] (ret_fast_syscall+0x0/0x30) Memory-hotplug has same problem as CMA has so the same fix can be applied to memory-hotplug as well. Fix it by reusing. Signed-off-by: Minchan Kim Cc: Kamezawa Hiroyuki Reviewed-by: Yasuaki Ishimatsu Acked-by: Michal Nazarewicz Cc: Marek Szyprowski Cc: Wen Congyang Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b35016156c19..f9ac0955e10a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -756,13 +756,6 @@ static unsigned long scan_lru_pages(unsigned long start, unsigned long end) return 0; } -static struct page * -hotremove_migrate_alloc(struct page *page, unsigned long private, int **x) -{ - /* This should be improooooved!! */ - return alloc_page(GFP_HIGHUSER_MOVABLE); -} - #define NR_OFFLINE_AT_ONCE_PAGES (256) static int do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) @@ -813,8 +806,12 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) putback_lru_pages(&source); goto out; } - /* this function returns # of failed pages */ - ret = migrate_pages(&source, hotremove_migrate_alloc, 0, + + /* + * alloc_migrate_target should be improooooved!! + * migrate_pages returns # of failed pages. + */ + ret = migrate_pages(&source, alloc_migrate_target, 0, true, MIGRATE_SYNC); if (ret) putback_lru_pages(&source); -- cgit v1.2.3 From 7f1290f2f2a4d2c3f1b7ce8e87256e052ca23125 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Mon, 8 Oct 2012 16:33:06 -0700 Subject: mm: fix-up zone present pages I think zone->present_pages indicates pages that buddy system can management, it should be: zone->present_pages = spanned pages - absent pages - bootmem pages, but is now: zone->present_pages = spanned pages - absent pages - memmap pages. spanned pages: total size, including holes. absent pages: holes. bootmem pages: pages used in system boot, managed by bootmem allocator. memmap pages: pages used by page structs. This may cause zone->present_pages less than it should be. For example, numa node 1 has ZONE_NORMAL and ZONE_MOVABLE, it's memmap and other bootmem will be allocated from ZONE_MOVABLE, so ZONE_NORMAL's present_pages should be spanned pages - absent pages, but now it also minus memmap pages(free_area_init_core), which are actually allocated from ZONE_MOVABLE. When offlining all memory of a zone, this will cause zone->present_pages less than 0, because present_pages is unsigned long type, it is actually a very large integer, it indirectly caused zone->watermark[WMARK_MIN] becomes a large integer(setup_per_zone_wmarks()), than cause totalreserve_pages become a large integer(calculate_totalreserve_pages()), and finally cause memory allocating failure when fork process(__vm_enough_memory()). [root@localhost ~]# dmesg -bash: fork: Cannot allocate memory I think the bug described in http://marc.info/?l=linux-mm&m=134502182714186&w=2 is also caused by wrong zone present pages. This patch intends to fix-up zone->present_pages when memory are freed to buddy system on x86_64 and IA64 platforms. Signed-off-by: Jianguo Wu Signed-off-by: Jiang Liu Reported-by: Petr Tesarik Tested-by: Petr Tesarik Cc: "Luck, Tony" Cc: Mel Gorman Cc: Yinghai Lu Cc: Minchan Kim Cc: Johannes Weiner Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f9ac0955e10a..ce690a911f1b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -106,6 +106,7 @@ static void get_page_bootmem(unsigned long info, struct page *page, void __ref put_page_bootmem(struct page *page) { unsigned long type; + struct zone *zone; type = (unsigned long) page->lru.next; BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || @@ -116,6 +117,12 @@ void __ref put_page_bootmem(struct page *page) set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); __free_pages_bootmem(page, 0); + + zone = page_zone(page); + zone_span_writelock(zone); + zone->present_pages++; + zone_span_writeunlock(zone); + totalram_pages++; } } -- cgit v1.2.3 From a16cee10c7ab994546ed98d9abfd4de74050124a Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Mon, 8 Oct 2012 16:33:58 -0700 Subject: memory-hotplug: preparation to notify memory block's state at memory hot remove remove_memory() is called in two cases: 1. echo offline >/sys/devices/system/memory/memoryXX/state 2. hot remove a memory device In the 1st case, the memory block's state is changed and the notification that memory block's state changed is sent to userland after calling remove_memory(). So user can notice memory block is changed. But in the 2nd case, the memory block's state is not changed and the notification is not also sent to userspcae even if calling remove_memory(). So user cannot notice memory block is changed. For adding the notification at memory hot remove, the patch just prepare as follows: 1st case uses offline_pages() for offlining memory. 2nd case uses remove_memory() for offlining memory and changing memory block's state and notifing the information. The patch does not implement notification to remove_memory(). Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Len Brown Cc: Christoph Lameter Cc: Minchan Kim Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ce690a911f1b..dfc0a6134c7c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -874,7 +874,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) return offlined; } -static int __ref offline_pages(unsigned long start_pfn, +static int __ref __offline_pages(unsigned long start_pfn, unsigned long end_pfn, unsigned long timeout) { unsigned long pfn, nr_pages, expire; @@ -1007,15 +1007,24 @@ out: return ret; } +int offline_pages(unsigned long start_pfn, unsigned long nr_pages) +{ + return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); +} + int remove_memory(u64 start, u64 size) { unsigned long start_pfn, end_pfn; start_pfn = PFN_DOWN(start); end_pfn = start_pfn + PFN_DOWN(size); - return offline_pages(start_pfn, end_pfn, 120 * HZ); + return __offline_pages(start_pfn, end_pfn, 120 * HZ); } #else +int offline_pages(unsigned long start_pfn, unsigned long nr_pages) +{ + return -EINVAL; +} int remove_memory(u64 start, u64 size) { return -EINVAL; -- cgit v1.2.3 From e90bdb7f52f94204c78fb40b0804645defdebd71 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Mon, 8 Oct 2012 16:34:01 -0700 Subject: memory-hotplug: update memory block's state and notify userspace remove_memory() will be called when hot removing a memory device. But even if offlining memory, we cannot notice it. So the patch updates the memory block's state and sends notification to userspace. Additionally, the memory device may contain more than one memory block. If the memory block has been offlined, __offline_pages() will fail. So we should try to offline one memory block at a time. Thus remove_memory() also check each memory block's state. So there is no need to check the memory block's state before calling remove_memory(). Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Len Brown Cc: Christoph Lameter Cc: Minchan Kim Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index dfc0a6134c7c..7d0797475a47 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1014,11 +1014,42 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) int remove_memory(u64 start, u64 size) { + struct memory_block *mem = NULL; + struct mem_section *section; unsigned long start_pfn, end_pfn; + unsigned long pfn, section_nr; + int ret; start_pfn = PFN_DOWN(start); end_pfn = start_pfn + PFN_DOWN(size); - return __offline_pages(start_pfn, end_pfn, 120 * HZ); + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + section_nr = pfn_to_section_nr(pfn); + if (!present_section_nr(section_nr)) + continue; + + section = __nr_to_section(section_nr); + /* same memblock? */ + if (mem) + if ((section_nr >= mem->start_section_nr) && + (section_nr <= mem->end_section_nr)) + continue; + + mem = find_memory_block_hinted(section, mem); + if (!mem) + continue; + + ret = offline_memory_block(mem); + if (ret) { + kobject_put(&mem->dev.kobj); + return ret; + } + } + + if (mem) + kobject_put(&mem->dev.kobj); + + return 0; } #else int offline_pages(unsigned long start_pfn, unsigned long nr_pages) -- cgit v1.2.3 From d760afd4d2570653891f94e13b848e97150dc5a6 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Mon, 8 Oct 2012 16:34:14 -0700 Subject: memory-hotplug: suppress "Trying to free nonexistent resource " warning When our x86 box calls __remove_pages(), release_mem_region() shows many warnings. And x86 box cannot unregister iomem_resource. "Trying to free nonexistent resource " release_mem_region() has been changed to be called in each PAGES_PER_SECTION by commit de7f0cba9678 ("memory hotplug: release memory regions in PAGES_PER_SECTION chunks"). Because powerpc registers iomem_resource in each PAGES_PER_SECTION chunk. But when I hot add memory on x86 box, iomem_resource is register in each _CRS not PAGES_PER_SECTION chunk. So x86 box unregisters iomem_resource. The patch fixes the problem. Signed-off-by: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Len Brown Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Christoph Lameter Cc: Minchan Kim Cc: Andrew Morton Cc: KOSAKI Motohiro Cc: Wen Congyang Cc: Dave Hansen Cc: Nathan Fontenot Cc: Badari Pulavarty Cc: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 7d0797475a47..56b758ae57d2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -369,11 +369,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); BUG_ON(nr_pages % PAGES_PER_SECTION); + release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE); + sections_to_remove = nr_pages / PAGES_PER_SECTION; for (i = 0; i < sections_to_remove; i++) { unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; - release_mem_region(pfn << PAGE_SHIFT, - PAGES_PER_SECTION << PAGE_SHIFT); ret = __remove_section(zone, __pfn_to_section(pfn)); if (ret) break; -- cgit v1.2.3