aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorrogersb11 <brettrogers11@gmail.com>2015-11-10 11:19:31 -0600
committerrogersb11 <brettrogers11@gmail.com>2015-11-10 14:01:25 -0500
commitb4d16f70c34ecb908c8a61e58ea51fecdd4a4b10 (patch)
tree973e9aeb97bb13497d17a43e6f3c070381bc5b87 /mm
parentd177fbc2f0c263b06c18bda2eb46200a31bcbebd (diff)
parent5dba9ddd98cbc7ad319d687887981a0ea0062c75 (diff)
downloadkernel_samsung_smdk4412-b4d16f70c34ecb908c8a61e58ea51fecdd4a4b10.tar.gz
kernel_samsung_smdk4412-b4d16f70c34ecb908c8a61e58ea51fecdd4a4b10.tar.bz2
kernel_samsung_smdk4412-b4d16f70c34ecb908c8a61e58ea51fecdd4a4b10.zip
Merge remote-tracking branch 'korg/linux-3.0.y' into cm-13.0
Conflicts: crypto/algapi.c drivers/gpu/drm/i915/i915_debugfs.c drivers/gpu/drm/i915/intel_display.c drivers/video/fbmem.c include/linux/nls.h kernel/cgroup.c kernel/signal.c kernel/timeconst.pl net/ipv4/ping.c Change-Id: I1f532925d1743df74d66bcdd6fc92f05c72ee0dd
Diffstat (limited to 'mm')
-rw-r--r--mm/fadvise.c18
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/hugetlb.c39
-rw-r--r--mm/memcontrol.c8
-rw-r--r--mm/memory.c47
-rw-r--r--mm/memory_hotplug.c15
-rw-r--r--mm/migrate.c25
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/mmu_notifier.c121
-rw-r--r--mm/nommu.c12
-rw-r--r--mm/page_alloc.c19
-rw-r--r--mm/shmem.c10
-rw-r--r--mm/swap_state.c18
13 files changed, 263 insertions, 80 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 8d723c9e8b7..35b2bb089a1 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -17,6 +17,7 @@
#include <linux/fadvise.h>
#include <linux/writeback.h>
#include <linux/syscalls.h>
+#include <linux/swap.h>
#include <asm/unistd.h>
@@ -123,9 +124,22 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
end_index = (endbyte >> PAGE_CACHE_SHIFT);
- if (end_index >= start_index)
- invalidate_mapping_pages(mapping, start_index,
+ if (end_index >= start_index) {
+ unsigned long count = invalidate_mapping_pages(mapping,
+ start_index, end_index);
+
+ /*
+ * If fewer pages were invalidated than expected then
+ * it is possible that some of the pages were on
+ * a per-cpu pagevec for a remote CPU. Drain all
+ * pagevecs and try again.
+ */
+ if (count < (end_index - start_index + 1)) {
+ lru_add_drain_all();
+ invalidate_mapping_pages(mapping, start_index,
end_index);
+ }
+ }
break;
default:
ret = -EINVAL;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a9ab45ec7d5..78f71869c4c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1838,6 +1838,8 @@ static void collapse_huge_page(struct mm_struct *mm,
goto out;
vma = find_vma(mm, address);
+ if (!vma)
+ goto out;
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;
if (address < hstart || address + HPAGE_PMD_SIZE > hend)
@@ -1893,7 +1895,12 @@ static void collapse_huge_page(struct mm_struct *mm,
pte_unmap(pte);
spin_lock(&mm->page_table_lock);
BUG_ON(!pmd_none(*pmd));
- set_pmd_at(mm, address, pmd, _pmd);
+ /*
+ * We can only use set_pmd_at when establishing
+ * hugepmds and never for establishing regular pmds that
+ * points to regular pagetables. Use pmd_populate for that
+ */
+ pmd_populate(mm, pmd, pmd_pgtable(_pmd));
spin_unlock(&mm->page_table_lock);
anon_vma_unlock(vma->anon_vma);
goto out;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 037f077b986..6fdad258877 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -602,6 +602,23 @@ int PageHuge(struct page *page)
EXPORT_SYMBOL_GPL(PageHuge);
+pgoff_t __basepage_index(struct page *page)
+{
+ struct page *page_head = compound_head(page);
+ pgoff_t index = page_index(page_head);
+ unsigned long compound_idx;
+
+ if (!PageHuge(page_head))
+ return page_index(page);
+
+ if (compound_order(page_head) >= MAX_ORDER)
+ compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
+ else
+ compound_idx = page - page_head;
+
+ return (index << compound_order(page_head)) + compound_idx;
+}
+
static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
{
struct page *page;
@@ -2006,8 +2023,12 @@ int hugetlb_report_node_meminfo(int nid, char *buf)
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
- struct hstate *h = &default_hstate;
- return h->nr_huge_pages * pages_per_huge_page(h);
+ struct hstate *h;
+ unsigned long nr_total_pages = 0;
+
+ for_each_hstate(h)
+ nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h);
+ return nr_total_pages;
}
static int hugetlb_acct_memory(struct hstate *h, long delta)
@@ -2658,7 +2679,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (ptep) {
entry = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_migration(entry))) {
- migration_entry_wait(mm, (pmd_t *)ptep, address);
+ migration_entry_wait_huge(mm, ptep);
return 0;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
@@ -2796,7 +2817,17 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
break;
}
- if (absent ||
+ /*
+ * We need call hugetlb_fault for both hugepages under migration
+ * (in which case hugetlb_fault waits for the migration,) and
+ * hwpoisoned hugepages (in which case we need to prevent the
+ * caller from accessing to them.) In order to do this, we use
+ * here is_swap_pte instead of is_hugetlb_entry_migration and
+ * is_hugetlb_entry_hwpoisoned. This is because it simply covers
+ * both cases, and because we can't follow correct pages
+ * directly from any kind of swap entries.
+ */
+ if (absent || is_swap_pte(huge_ptep_get(pte)) ||
((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
int ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 57cdf5ad692..d7b51d5bc0b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4433,7 +4433,13 @@ static int compare_thresholds(const void *a, const void *b)
const struct mem_cgroup_threshold *_a = a;
const struct mem_cgroup_threshold *_b = b;
- return _a->threshold - _b->threshold;
+ if (_a->threshold > _b->threshold)
+ return 1;
+
+ if (_a->threshold < _b->threshold)
+ return -1;
+
+ return 0;
}
static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem)
diff --git a/mm/memory.c b/mm/memory.c
index 6204ce20090..5331e67685d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2374,6 +2374,53 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL(remap_pfn_range);
+/**
+ * vm_iomap_memory - remap memory to userspace
+ * @vma: user vma to map to
+ * @start: start of area
+ * @len: size of area
+ *
+ * This is a simplified io_remap_pfn_range() for common driver use. The
+ * driver just needs to give us the physical memory range to be mapped,
+ * we'll figure out the rest from the vma information.
+ *
+ * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
+ * whatever write-combining details or similar.
+ */
+int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
+{
+ unsigned long vm_len, pfn, pages;
+
+ /* Check that the physical memory area passed in looks valid */
+ if (start + len < start)
+ return -EINVAL;
+ /*
+ * You *really* shouldn't map things that aren't page-aligned,
+ * but we've historically allowed it because IO memory might
+ * just have smaller alignment.
+ */
+ len += start & ~PAGE_MASK;
+ pfn = start >> PAGE_SHIFT;
+ pages = (len + ~PAGE_MASK) >> PAGE_SHIFT;
+ if (pfn + pages < pfn)
+ return -EINVAL;
+
+ /* We start the mapping 'vm_pgoff' pages into the area */
+ if (vma->vm_pgoff > pages)
+ return -EINVAL;
+ pfn += vma->vm_pgoff;
+ pages -= vma->vm_pgoff;
+
+ /* Can we fit all of the mapping? */
+ vm_len = vma->vm_end - vma->vm_start;
+ if (vm_len >> PAGE_SHIFT > pages)
+ return -EINVAL;
+
+ /* Ok, let it rip */
+ return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_iomap_memory);
+
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end,
pte_fn_t fn, void *data)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9fcd1b92f94..94463bcfbe7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -453,19 +453,20 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
zone->present_pages += onlined_pages;
zone->zone_pgdat->node_present_pages += onlined_pages;
- if (need_zonelists_rebuild)
- build_all_zonelists(zone);
- else
- zone_pcp_update(zone);
+ if (onlined_pages) {
+ node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
+ if (need_zonelists_rebuild)
+ build_all_zonelists(zone);
+ else
+ zone_pcp_update(zone);
+ }
mutex_unlock(&zonelists_mutex);
init_per_zone_wmark_min();
- if (onlined_pages) {
+ if (onlined_pages)
kswapd_run(zone_to_nid(zone));
- node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
- }
vm_total_pages = nr_free_pagecache_pages();
diff --git a/mm/migrate.c b/mm/migrate.c
index 480714b6f3f..3e315a7e4c8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -147,7 +147,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
if (PageHuge(new))
pte = pte_mkhuge(pte);
#endif
- flush_cache_page(vma, addr, pte_pfn(pte));
+ flush_dcache_page(new);
set_pte_at(mm, addr, ptep, pte);
if (PageHuge(new)) {
@@ -184,15 +184,14 @@ static void remove_migration_ptes(struct page *old, struct page *new)
*
* This function is called from do_swap_page().
*/
-void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
- unsigned long address)
+static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
+ spinlock_t *ptl)
{
- pte_t *ptep, pte;
- spinlock_t *ptl;
+ pte_t pte;
swp_entry_t entry;
struct page *page;
- ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ spin_lock(ptl);
pte = *ptep;
if (!is_swap_pte(pte))
goto out;
@@ -220,6 +219,20 @@ out:
pte_unmap_unlock(ptep, ptl);
}
+void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long address)
+{
+ spinlock_t *ptl = pte_lockptr(mm, pmd);
+ pte_t *ptep = pte_offset_map(pmd, address);
+ __migration_entry_wait(mm, ptep, ptl);
+}
+
+void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte)
+{
+ spinlock_t *ptl = &(mm)->page_table_lock;
+ __migration_entry_wait(mm, pte, ptl);
+}
+
#ifdef CONFIG_BLOCK
/* Returns true if all buffers are successfully locked */
static bool buffer_migrate_lock_buffers(struct buffer_head *head,
diff --git a/mm/mmap.c b/mm/mmap.c
index d49736ff8a8..d1cf520e511 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1581,7 +1581,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
if (mm) {
/* Check the cache first. */
/* (Cache hit rate is typically around 35%.) */
- vma = mm->mmap_cache;
+ vma = ACCESS_ONCE(mm->mmap_cache);
if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
struct rb_node * rb_node;
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 71c78115c45..2b52947e48f 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -14,10 +14,14 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/err.h>
+#include <linux/srcu.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/slab.h>
+/* global SRCU for all MMs */
+static struct srcu_struct srcu;
+
/*
* This function can't run concurrently against mmu_notifier_register
* because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -25,32 +29,32 @@
* in parallel despite there being no task using this mm any more,
* through the vmas outside of the exit_mmap context, such as with
* vmtruncate. This serializes against mmu_notifier_unregister with
- * the mmu_notifier_mm->lock in addition to RCU and it serializes
- * against the other mmu notifiers with RCU. struct mmu_notifier_mm
+ * the mmu_notifier_mm->lock in addition to SRCU and it serializes
+ * against the other mmu notifiers with SRCU. struct mmu_notifier_mm
* can't go away from under us as exit_mmap holds an mm_count pin
* itself.
*/
void __mmu_notifier_release(struct mm_struct *mm)
{
struct mmu_notifier *mn;
- struct hlist_node *n;
+ struct hlist_node *node;
+ int id;
/*
- * RCU here will block mmu_notifier_unregister until
+ * SRCU here will block mmu_notifier_unregister until
* ->release returns.
*/
- rcu_read_lock();
- hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist)
+ id = srcu_read_lock(&srcu);
+ hlist_for_each_entry_rcu(mn, node, &mm->mmu_notifier_mm->list, hlist)
/*
- * if ->release runs before mmu_notifier_unregister it
- * must be handled as it's the only way for the driver
- * to flush all existing sptes and stop the driver
- * from establishing any more sptes before all the
- * pages in the mm are freed.
+ * If ->release runs before mmu_notifier_unregister it must be
+ * handled, as it's the only way for the driver to flush all
+ * existing sptes and stop the driver from establishing any more
+ * sptes before all the pages in the mm are freed.
*/
if (mn->ops->release)
mn->ops->release(mn, mm);
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
spin_lock(&mm->mmu_notifier_mm->lock);
while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -59,24 +63,24 @@ void __mmu_notifier_release(struct mm_struct *mm)
hlist);
/*
* We arrived before mmu_notifier_unregister so
- * mmu_notifier_unregister will do nothing other than
- * to wait ->release to finish and
- * mmu_notifier_unregister to return.
+ * mmu_notifier_unregister will do nothing other than to wait
+ * for ->release to finish and for mmu_notifier_unregister to
+ * return.
*/
hlist_del_init_rcu(&mn->hlist);
}
spin_unlock(&mm->mmu_notifier_mm->lock);
/*
- * synchronize_rcu here prevents mmu_notifier_release to
- * return to exit_mmap (which would proceed freeing all pages
- * in the mm) until the ->release method returns, if it was
- * invoked by mmu_notifier_unregister.
+ * synchronize_srcu here prevents mmu_notifier_release from returning to
+ * exit_mmap (which would proceed with freeing all pages in the mm)
+ * until the ->release method returns, if it was invoked by
+ * mmu_notifier_unregister.
*
- * The mmu_notifier_mm can't go away from under us because one
- * mm_count is hold by exit_mmap.
+ * The mmu_notifier_mm can't go away from under us because one mm_count
+ * is held by exit_mmap.
*/
- synchronize_rcu();
+ synchronize_srcu(&srcu);
}
/*
@@ -89,14 +93,14 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
{
struct mmu_notifier *mn;
struct hlist_node *n;
- int young = 0;
+ int young = 0, id;
- rcu_read_lock();
+ id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->clear_flush_young)
young |= mn->ops->clear_flush_young(mn, mm, address);
}
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
return young;
}
@@ -106,9 +110,9 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
{
struct mmu_notifier *mn;
struct hlist_node *n;
- int young = 0;
+ int young = 0, id;
- rcu_read_lock();
+ id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->test_young) {
young = mn->ops->test_young(mn, mm, address);
@@ -116,7 +120,7 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
break;
}
}
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
return young;
}
@@ -126,8 +130,9 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
{
struct mmu_notifier *mn;
struct hlist_node *n;
+ int id;
- rcu_read_lock();
+ id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->change_pte)
mn->ops->change_pte(mn, mm, address, pte);
@@ -138,7 +143,7 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
else if (mn->ops->invalidate_page)
mn->ops->invalidate_page(mn, mm, address);
}
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
}
void __mmu_notifier_invalidate_page(struct mm_struct *mm,
@@ -146,13 +151,14 @@ void __mmu_notifier_invalidate_page(struct mm_struct *mm,
{
struct mmu_notifier *mn;
struct hlist_node *n;
+ int id;
- rcu_read_lock();
+ id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->invalidate_page)
mn->ops->invalidate_page(mn, mm, address);
}
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
}
void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -160,13 +166,14 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
{
struct mmu_notifier *mn;
struct hlist_node *n;
+ int id;
- rcu_read_lock();
+ id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->invalidate_range_start)
mn->ops->invalidate_range_start(mn, mm, start, end);
}
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
}
void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -174,13 +181,14 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
{
struct mmu_notifier *mn;
struct hlist_node *n;
+ int id;
- rcu_read_lock();
+ id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->invalidate_range_end)
mn->ops->invalidate_range_end(mn, mm, start, end);
}
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
}
static int do_mmu_notifier_register(struct mmu_notifier *mn,
@@ -192,6 +200,12 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
BUG_ON(atomic_read(&mm->mm_users) <= 0);
+ /*
+ * Verify that mmu_notifier_init() already run and the global srcu is
+ * initialized.
+ */
+ BUG_ON(!srcu.per_cpu_ref);
+
ret = -ENOMEM;
mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
if (unlikely(!mmu_notifier_mm))
@@ -274,8 +288,8 @@ void __mmu_notifier_mm_destroy(struct mm_struct *mm)
/*
* This releases the mm_count pin automatically and frees the mm
* structure if it was the last user of it. It serializes against
- * running mmu notifiers with RCU and against mmu_notifier_unregister
- * with the unregister lock + RCU. All sptes must be dropped before
+ * running mmu notifiers with SRCU and against mmu_notifier_unregister
+ * with the unregister lock + SRCU. All sptes must be dropped before
* calling mmu_notifier_unregister. ->release or any other notifier
* method may be invoked concurrently with mmu_notifier_unregister,
* and only after mmu_notifier_unregister returned we're guaranteed
@@ -287,33 +301,44 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
if (!hlist_unhashed(&mn->hlist)) {
/*
- * RCU here will force exit_mmap to wait ->release to finish
- * before freeing the pages.
+ * SRCU here will force exit_mmap to wait for ->release to
+ * finish before freeing the pages.
*/
- rcu_read_lock();
+ int id;
+ id = srcu_read_lock(&srcu);
/*
- * exit_mmap will block in mmu_notifier_release to
- * guarantee ->release is called before freeing the
- * pages.
+ * exit_mmap will block in mmu_notifier_release to guarantee
+ * that ->release is called before freeing the pages.
*/
if (mn->ops->release)
mn->ops->release(mn, mm);
- rcu_read_unlock();
+ srcu_read_unlock(&srcu, id);
spin_lock(&mm->mmu_notifier_mm->lock);
- hlist_del_rcu(&mn->hlist);
+ /*
+ * Can not use list_del_rcu() since __mmu_notifier_release
+ * can delete it before we hold the lock.
+ */
+ hlist_del_init_rcu(&mn->hlist);
spin_unlock(&mm->mmu_notifier_mm->lock);
}
/*
- * Wait any running method to finish, of course including
+ * Wait for any running method to finish, of course including
* ->release if it was run by mmu_notifier_relase instead of us.
*/
- synchronize_rcu();
+ synchronize_srcu(&srcu);
BUG_ON(atomic_read(&mm->mm_count) <= 0);
mmdrop(mm);
}
EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
+
+static int __init mmu_notifier_init(void)
+{
+ return init_srcu_struct(&srcu);
+}
+
+module_init(mmu_notifier_init);
diff --git a/mm/nommu.c b/mm/nommu.c
index 5ff9b35883e..92429242a19 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -808,7 +808,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
struct vm_area_struct *vma;
/* check the cache first */
- vma = mm->mmap_cache;
+ vma = ACCESS_ONCE(mm->mmap_cache);
if (vma && vma->vm_start <= addr && vma->vm_end > addr)
return vma;
@@ -1826,6 +1826,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL(remap_pfn_range);
+int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
+{
+ unsigned long pfn = start >> PAGE_SHIFT;
+ unsigned long vm_len = vma->vm_end - vma->vm_start;
+
+ pfn += vma->vm_pgoff;
+ return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_iomap_memory);
+
int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff)
{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d1a7b6d35dc..9450436fe1f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4379,10 +4379,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
* round what is now in bits to nearest long in bits, then return it in
* bytes.
*/
-static unsigned long __init usemap_size(unsigned long zonesize)
+static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
{
unsigned long usemapsize;
+ zonesize += zone_start_pfn & (pageblock_nr_pages-1);
usemapsize = roundup(zonesize, pageblock_nr_pages);
usemapsize = usemapsize >> pageblock_order;
usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4392,17 +4393,19 @@ static unsigned long __init usemap_size(unsigned long zonesize)
}
static void __init setup_usemap(struct pglist_data *pgdat,
- struct zone *zone, unsigned long zonesize)
+ struct zone *zone,
+ unsigned long zone_start_pfn,
+ unsigned long zonesize)
{
- unsigned long usemapsize = usemap_size(zonesize);
+ unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
zone->pageblock_flags = NULL;
if (usemapsize)
zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
usemapsize);
}
#else
-static inline void setup_usemap(struct pglist_data *pgdat,
- struct zone *zone, unsigned long zonesize) {}
+static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
+ unsigned long zone_start_pfn, unsigned long zonesize) {}
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4530,7 +4533,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
continue;
set_pageblock_order(pageblock_default_order());
- setup_usemap(pgdat, zone, size);
+ setup_usemap(pgdat, zone, zone_start_pfn, size);
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
BUG_ON(ret);
@@ -5850,6 +5853,10 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
zone->free_area[order].nr_free--;
__mod_zone_page_state(zone, NR_FREE_PAGES,
- (1UL << order));
+#ifdef CONFIG_HIGHMEM
+ if (PageHighMem(page))
+ totalhigh_pages -= 1 << order;
+#endif
for (i = 0; i < (1 << order); i++)
SetPageReserved((page+i));
pfn += (1 << order);
diff --git a/mm/shmem.c b/mm/shmem.c
index 818947be0d9..a858b67b426 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2514,6 +2514,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
unsigned long inodes;
int error = -EINVAL;
+ config.mpol = NULL;
if (shmem_parse_options(data, &config, true))
return error;
@@ -2539,8 +2540,13 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
sbinfo->max_inodes = config.max_inodes;
sbinfo->free_inodes = config.max_inodes - inodes;
- mpol_put(sbinfo->mpol);
- sbinfo->mpol = config.mpol; /* transfers initial ref */
+ /*
+ * Preserve previous mempolicy unless mpol remount option was specified.
+ */
+ if (config.mpol) {
+ mpol_put(sbinfo->mpol);
+ sbinfo->mpol = config.mpol; /* transfers initial ref */
+ }
out:
spin_unlock(&sbinfo->stat_lock);
return error;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 10e9198778c..43b957e5754 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -315,8 +315,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* Swap entry may have been freed since our caller observed it.
*/
err = swapcache_prepare(entry);
- if (err == -EEXIST) { /* seems racy */
+ if (err == -EEXIST) {
radix_tree_preload_end();
+ /*
+ * We might race against get_swap_page() and stumble
+ * across a SWAP_HAS_CACHE swap_map entry whose page
+ * has not been brought into the swapcache yet, while
+ * the other end is scheduled away waiting on discard
+ * I/O completion at scan_swap_map().
+ *
+ * In order to avoid turning this transitory state
+ * into a permanent loop around this -EEXIST case
+ * if !CONFIG_PREEMPT and the I/O completion happens
+ * to be waiting on the CPU waitqueue where we are now
+ * busy looping, we just conditionally invoke the
+ * scheduler here, if there are some more important
+ * tasks to run.
+ */
+ cond_resched();
continue;
}
if (err) { /* swp entry is obsolete ? */