aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorColin Cross <ccross@android.com>2015-10-05 16:13:04 +0200
committerSimon Shields <keepcalm444@gmail.com>2016-08-23 04:55:14 -0700
commit8aa6fa75955db416644e6c9367b350c25cb6745c (patch)
treedb5c4ef8c8cf97794c1c02cf389960e2bb9e8305
parent6112c4d4e3408cf2aea5a626a3e39ba3922094c5 (diff)
downloadkernel_samsung_smdk4412-8aa6fa75955db416644e6c9367b350c25cb6745c.tar.gz
kernel_samsung_smdk4412-8aa6fa75955db416644e6c9367b350c25cb6745c.tar.bz2
kernel_samsung_smdk4412-8aa6fa75955db416644e6c9367b350c25cb6745c.zip
mm: add a field to store names for private anonymous memory
Userspace processes often have multiple allocators that each do anonymous mmaps to get memory. When examining memory usage of individual processes or systems as a whole, it is useful to be able to break down the various heaps that were allocated by each layer and examine their size, RSS, and physical memory usage. This patch adds a user pointer to the shared union in vm_area_struct that points to a null terminated string inside the user process containing a name for the vma. vmas that point to the same address will be merged, but vmas that point to equivalent strings at different addresses will not be merged. Userspace can set the name for a region of memory by calling prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name); Setting the name to NULL clears it. The names of named anonymous vmas are shown in /proc/pid/maps as [anon:<name>] and in /proc/pid/smaps in a new "Name" field that is only present for named vmas. If the userspace pointer is no longer valid all or part of the name will be replaced with "<fault>". The idea to store a userspace pointer to reduce the complexity within mm (at the expense of the complexity of reading /proc/pid/mem) came from Dave Hansen. This results in no runtime overhead in the mm subsystem other than comparing the anon_name pointers when considering vma merging. The pointer is stored in a union with fieds that are only used on file-backed mappings, so it does not increase memory usage. Change-Id: I53b093d98dc24f41377824f34e076edced4a6f07
-rw-r--r--Documentation/filesystems/proc.txt6
-rw-r--r--fs/proc/task_mmu.c63
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mm_types.h15
-rw-r--r--include/linux/prctl.h3
-rw-r--r--kernel/sys.c146
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/mempolicy.c5
-rw-r--r--mm/mlock.c3
-rw-r--r--mm/mmap.c44
-rw-r--r--mm/mprotect.c3
11 files changed, 269 insertions, 24 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index db3b1aba32a..6b0d09873be 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -354,6 +354,8 @@ is not associated with a file:
[stack] = the stack of the main process
[vdso] = the "virtual dynamic shared object",
the kernel system call handler
+ [anon:<name>] = an anonymous mapping that has been
+ named by userspace
or if empty, the mapping is anonymous.
@@ -376,6 +378,7 @@ Swap: 0 kB
KernelPageSize: 4 kB
MMUPageSize: 4 kB
Locked: 374 kB
+Name: name from userspace
The first of these lines shows the same information as is displayed for the
mapping in /proc/PID/maps. The remaining lines show the size of the mapping
@@ -391,6 +394,9 @@ and a page is modified, the file page is replaced by a private anonymous copy.
"Swap" shows how much would-be-anonymous memory is also used, but out on
swap.
+The "Name" field will only be present on a mapping that has been named by
+userspace, and will show the name passed in by userspace.
+
This file is only present if the CONFIG_MMU kernel configuration option is
enabled.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 00e7ac486d9..f43c5406b56 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -88,6 +88,56 @@ static void pad_len_spaces(struct seq_file *m, int len)
seq_printf(m, "%*c", len, ' ');
}
+static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
+{
+ const char __user *name = vma_get_anon_name(vma);
+ struct mm_struct *mm = vma->vm_mm;
+
+ unsigned long page_start_vaddr;
+ unsigned long page_offset;
+ unsigned long num_pages;
+ unsigned long max_len = NAME_MAX;
+ int i;
+
+ page_start_vaddr = (unsigned long)name & PAGE_MASK;
+ page_offset = (unsigned long)name - page_start_vaddr;
+ num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
+
+ seq_puts(m, "[anon:");
+
+ for (i = 0; i < num_pages; i++) {
+ int len;
+ int write_len;
+ const char *kaddr;
+ long pages_pinned;
+ struct page *page;
+
+ pages_pinned = get_user_pages(current, mm, page_start_vaddr,
+ 1, 0, 0, &page, NULL);
+ if (pages_pinned < 1) {
+ seq_puts(m, "<fault>]");
+ return;
+ }
+
+ kaddr = (const char *)kmap(page);
+ len = min(max_len, PAGE_SIZE - page_offset);
+ write_len = strnlen(kaddr + page_offset, len);
+ seq_write(m, kaddr + page_offset, write_len);
+ kunmap(page);
+ put_page(page);
+
+ /* if strnlen hit a null terminator then we're done */
+ if (write_len != len)
+ break;
+
+ max_len -= len;
+ page_offset = 0;
+ page_start_vaddr += PAGE_SIZE;
+ }
+
+ seq_putc(m, ']');
+}
+
static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
{
if (vma && vma != priv->tail_vma) {
@@ -264,7 +314,14 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
} else {
name = "[vdso]";
}
+ goto done;
}
+
+ if (vma_get_anon_name(vma)) {
+ pad_len_spaces(m, len);
+ seq_print_vma_name(m, vma);
+ }
+done:
if (name) {
pad_len_spaces(m, len);
seq_puts(m, name);
@@ -474,6 +531,12 @@ static int show_smap(struct seq_file *m, void *v)
(vma->vm_flags & VM_LOCKED) ?
(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
+ if (vma_get_anon_name(vma)) {
+ seq_puts(m, "Name: ");
+ seq_print_vma_name(m, vma);
+ seq_putc(m, '\n');
+ }
+
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task->mm))
? vma->vm_start : 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d5dc6af4ff6..92a055bd3dd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1418,7 +1418,7 @@ extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
extern struct vm_area_struct *vma_merge(struct mm_struct *,
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
- struct mempolicy *);
+ struct mempolicy *, const char __user *);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int split_vma(struct mm_struct *,
struct vm_area_struct *, unsigned long addr, int new_below);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 059839c7000..557f9b69bfb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -162,6 +162,10 @@ struct vm_area_struct {
* linkage into the address_space->i_mmap prio tree, or
* linkage to the list of like vmas hanging off its node, or
* linkage of vma in the address_space->i_mmap_nonlinear list.
+ *
+ * For private anonymous mappings, a pointer to a null terminated string
+ * in the user process containing the name given to the vma, or NULL
+ * if unnamed.
*/
union {
struct {
@@ -171,6 +175,7 @@ struct vm_area_struct {
} vm_set;
struct raw_prio_tree_node prio_tree_node;
+ const char __user *anon_name;
} shared;
/*
@@ -345,4 +350,14 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return mm->cpu_vm_mask_var;
}
+
+/* Return the name for an anonymous mapping or NULL for a file-backed mapping */
+static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_file)
+ return NULL;
+
+ return vma->shared.anon_name;
+}
+
#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2c216..dd90287deeb 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -102,4 +102,7 @@
#define PR_MCE_KILL_GET 34
+#define PR_SET_VMA 0x53564d41
+# define PR_SET_VMA_ANON_NAME 0
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index be53817497c..79b66c09844 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -40,6 +40,9 @@
#include <linux/syscore_ops.h>
#include <linux/version.h>
#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -1660,6 +1663,146 @@ SYSCALL_DEFINE1(umask, int, mask)
return mask;
}
+
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ const char __user *name_addr)
+{
+ struct mm_struct * mm = vma->vm_mm;
+ int error = 0;
+ pgoff_t pgoff;
+
+ if (name_addr == vma_get_anon_name(vma)) {
+ *prev = vma;
+ goto out;
+ }
+
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+ vma->vm_file, pgoff, vma_policy(vma),
+ name_addr);
+ if (*prev) {
+ vma = *prev;
+ goto success;
+ }
+
+ *prev = vma;
+
+ if (start != vma->vm_start) {
+ error = split_vma(mm, vma, start, 1);
+ if (error)
+ goto out;
+ }
+
+ if (end != vma->vm_end) {
+ error = split_vma(mm, vma, end, 0);
+ if (error)
+ goto out;
+ }
+
+success:
+ if (!vma->vm_file)
+ vma->shared.anon_name = name_addr;
+
+out:
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+ unsigned long arg)
+{
+ unsigned long tmp;
+ struct vm_area_struct * vma, *prev;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ * - this matches the handling in madvise.
+ */
+ vma = find_vma_prev(current->mm, start, &prev);
+ if (vma && start > vma->vm_start)
+ prev = vma;
+
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ return error;
+
+ /* Here start < (end|vma->vm_end). */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ if (start >= end)
+ return error;
+ }
+
+ /* Here vma->vm_start <= start < (end|vma->vm_end) */
+ tmp = vma->vm_end;
+ if (end < tmp)
+ tmp = end;
+
+ /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+ error = prctl_update_vma_anon_name(vma, &prev, start, end,
+ (const char __user *)arg);
+ if (error)
+ return error;
+ start = tmp;
+ if (prev && start < prev->vm_end)
+ start = prev->vm_end;
+ error = unmapped_error;
+ if (start >= end)
+ return error;
+ if (prev)
+ vma = prev->vm_next;
+ else /* madvise_remove dropped mmap_sem */
+ vma = find_vma(current->mm, start);
+ }
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+ unsigned long len_in, unsigned long arg)
+{
+ struct mm_struct *mm = current->mm;
+ int error;
+ unsigned long len;
+ unsigned long end;
+
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+ /* Check to see whether len was rounded up from small -ve to zero */
+ if (len_in && !len)
+ return -EINVAL;
+
+ end = start + len;
+ if (end < start)
+ return -EINVAL;
+
+ if (end == start)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+
+ switch (opt) {
+ case PR_SET_VMA_ANON_NAME:
+ error = prctl_set_vma_anon_name(start, end, arg);
+ break;
+ default:
+ error = -EINVAL;
+ }
+
+ up_write(&mm->mmap_sem);
+
+ return error;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -1808,6 +1951,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
error = PR_MCE_KILL_DEFAULT;
break;
+ case PR_SET_VMA:
+ error = prctl_set_vma(arg2, arg3, arg4, arg5);
+ break;
default:
error = -EINVAL;
break;
diff --git a/mm/madvise.c b/mm/madvise.c
index deabe5f603a..5a273afd199 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -87,7 +87,8 @@ static long madvise_behavior(struct vm_area_struct * vma,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
- vma->vm_file, pgoff, vma_policy(vma));
+ vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (*prev) {
vma = *prev;
goto success;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7065fb3a2b4..7c8caa9bc0a 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -664,8 +664,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
vmend = min(end, vma->vm_end);
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff, new_pol);
+ prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
+ vma->anon_vma, vma->vm_file, pgoff,
+ new_pol, vma_get_anon_name(name));
if (prev) {
vma = prev;
next = vma->vm_next;
diff --git a/mm/mlock.c b/mm/mlock.c
index b41b3b7788f..734fd7f08f9 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -321,7 +321,8 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
- vma->vm_file, pgoff, vma_policy(vma));
+ vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (*prev) {
vma = *prev;
goto success;
diff --git a/mm/mmap.c b/mm/mmap.c
index d1cf520e511..96f124e15a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -656,7 +656,8 @@ again: remove_next = 1 + (end > next->vm_end);
* per-vma resources, so we don't attempt to merge those.
*/
static inline int is_mergeable_vma(struct vm_area_struct *vma,
- struct file *file, unsigned long vm_flags)
+ struct file *file, unsigned long vm_flags,
+ const char __user *anon_name)
{
/* VM_CAN_NONLINEAR may get set later by f_op->mmap() */
if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR)
@@ -665,6 +666,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
return 0;
if (vma->vm_ops && vma->vm_ops->close)
return 0;
+ if (vma_get_anon_name(vma) != anon_name)
+ return 0;
return 1;
}
@@ -695,9 +698,10 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
*/
static int
can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
- struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+ struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+ const char __user *anon_name)
{
- if (is_mergeable_vma(vma, file, vm_flags) &&
+ if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
if (vma->vm_pgoff == vm_pgoff)
return 1;
@@ -714,9 +718,10 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
*/
static int
can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
- struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+ struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+ const char __user *anon_name)
{
- if (is_mergeable_vma(vma, file, vm_flags) &&
+ if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
pgoff_t vm_pglen;
vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
@@ -727,9 +732,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
}
/*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor. Or both (it neatly fills a hole).
*
* In most cases - when called for mmap, brk or mremap - [addr,end) is
* certain not to be mapped by the time vma_merge is called; but when
@@ -759,7 +764,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct vm_area_struct *prev, unsigned long addr,
unsigned long end, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
- pgoff_t pgoff, struct mempolicy *policy)
+ pgoff_t pgoff, struct mempolicy *policy,
+ const char __user *anon_name)
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next;
@@ -785,15 +791,15 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
*/
if (prev && prev->vm_end == addr &&
mpol_equal(vma_policy(prev), policy) &&
- can_vma_merge_after(prev, vm_flags,
- anon_vma, file, pgoff)) {
+ can_vma_merge_after(prev, vm_flags, anon_vma,
+ file, pgoff, anon_name)) {
/*
* OK, it can. Can we now merge in the successor as well?
*/
if (next && end == next->vm_start &&
mpol_equal(policy, vma_policy(next)) &&
- can_vma_merge_before(next, vm_flags,
- anon_vma, file, pgoff+pglen) &&
+ can_vma_merge_before(next, vm_flags, anon_vma,
+ file, pgoff+pglen, anon_name) &&
is_mergeable_anon_vma(prev->anon_vma,
next->anon_vma, NULL)) {
/* cases 1, 6 */
@@ -813,8 +819,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
*/
if (next && end == next->vm_start &&
mpol_equal(policy, vma_policy(next)) &&
- can_vma_merge_before(next, vm_flags,
- anon_vma, file, pgoff+pglen)) {
+ can_vma_merge_before(next, vm_flags, anon_vma,
+ file, pgoff+pglen, anon_name)) {
if (prev && addr < prev->vm_end) /* case 4 */
err = vma_adjust(prev, prev->vm_start,
addr, prev->vm_pgoff, NULL);
@@ -1251,7 +1257,8 @@ munmap_back:
/*
* Can we just expand an old mapping?
*/
- vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
+ vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
+ NULL, NULL);
if (vma)
goto out;
@@ -2202,7 +2209,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
/* Can we just expand an old private anonymous mapping? */
vma = vma_merge(mm, prev, addr, addr + len, flags,
- NULL, NULL, pgoff, NULL);
+ NULL, NULL, pgoff, NULL, NULL);
if (vma)
goto out;
@@ -2340,7 +2347,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+ vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (new_vma) {
/*
* Source vma may have been merged into new_vma
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 5a688a2756b..5f168ebd9fb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -179,7 +179,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
*/
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*pprev = vma_merge(mm, *pprev, start, end, newflags,
- vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+ vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+ vma_get_anon_name(vma));
if (*pprev) {
vma = *pprev;
goto success;