aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorColin Cross <ccross@android.com>2015-10-05 16:13:04 +0200
committerSimon Shields <keepcalm444@gmail.com>2016-08-23 04:55:14 -0700
commit8aa6fa75955db416644e6c9367b350c25cb6745c (patch)
treedb5c4ef8c8cf97794c1c02cf389960e2bb9e8305 /kernel
parent6112c4d4e3408cf2aea5a626a3e39ba3922094c5 (diff)
downloadkernel_samsung_smdk4412-8aa6fa75955db416644e6c9367b350c25cb6745c.tar.gz
kernel_samsung_smdk4412-8aa6fa75955db416644e6c9367b350c25cb6745c.tar.bz2
kernel_samsung_smdk4412-8aa6fa75955db416644e6c9367b350c25cb6745c.zip
mm: add a field to store names for private anonymous memory
Userspace processes often have multiple allocators that each do anonymous mmaps to get memory. When examining memory usage of individual processes or systems as a whole, it is useful to be able to break down the various heaps that were allocated by each layer and examine their size, RSS, and physical memory usage. This patch adds a user pointer to the shared union in vm_area_struct that points to a null terminated string inside the user process containing a name for the vma. vmas that point to the same address will be merged, but vmas that point to equivalent strings at different addresses will not be merged. Userspace can set the name for a region of memory by calling prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name); Setting the name to NULL clears it. The names of named anonymous vmas are shown in /proc/pid/maps as [anon:<name>] and in /proc/pid/smaps in a new "Name" field that is only present for named vmas. If the userspace pointer is no longer valid all or part of the name will be replaced with "<fault>". The idea to store a userspace pointer to reduce the complexity within mm (at the expense of the complexity of reading /proc/pid/mem) came from Dave Hansen. This results in no runtime overhead in the mm subsystem other than comparing the anon_name pointers when considering vma merging. The pointer is stored in a union with fieds that are only used on file-backed mappings, so it does not increase memory usage. Change-Id: I53b093d98dc24f41377824f34e076edced4a6f07
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sys.c146
1 files changed, 146 insertions, 0 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index be53817497c..79b66c09844 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -40,6 +40,9 @@
#include <linux/syscore_ops.h>
#include <linux/version.h>
#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -1660,6 +1663,146 @@ SYSCALL_DEFINE1(umask, int, mask)
return mask;
}
+
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ const char __user *name_addr)
+{
+ struct mm_struct * mm = vma->vm_mm;
+ int error = 0;
+ pgoff_t pgoff;
+
+ if (name_addr == vma_get_anon_name(vma)) {
+ *prev = vma;
+ goto out;
+ }
+
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+ vma->vm_file, pgoff, vma_policy(vma),
+ name_addr);
+ if (*prev) {
+ vma = *prev;
+ goto success;
+ }
+
+ *prev = vma;
+
+ if (start != vma->vm_start) {
+ error = split_vma(mm, vma, start, 1);
+ if (error)
+ goto out;
+ }
+
+ if (end != vma->vm_end) {
+ error = split_vma(mm, vma, end, 0);
+ if (error)
+ goto out;
+ }
+
+success:
+ if (!vma->vm_file)
+ vma->shared.anon_name = name_addr;
+
+out:
+ if (error == -ENOMEM)
+ error = -EAGAIN;
+ return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+ unsigned long arg)
+{
+ unsigned long tmp;
+ struct vm_area_struct * vma, *prev;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ * - this matches the handling in madvise.
+ */
+ vma = find_vma_prev(current->mm, start, &prev);
+ if (vma && start > vma->vm_start)
+ prev = vma;
+
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ return error;
+
+ /* Here start < (end|vma->vm_end). */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ if (start >= end)
+ return error;
+ }
+
+ /* Here vma->vm_start <= start < (end|vma->vm_end) */
+ tmp = vma->vm_end;
+ if (end < tmp)
+ tmp = end;
+
+ /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+ error = prctl_update_vma_anon_name(vma, &prev, start, end,
+ (const char __user *)arg);
+ if (error)
+ return error;
+ start = tmp;
+ if (prev && start < prev->vm_end)
+ start = prev->vm_end;
+ error = unmapped_error;
+ if (start >= end)
+ return error;
+ if (prev)
+ vma = prev->vm_next;
+ else /* madvise_remove dropped mmap_sem */
+ vma = find_vma(current->mm, start);
+ }
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+ unsigned long len_in, unsigned long arg)
+{
+ struct mm_struct *mm = current->mm;
+ int error;
+ unsigned long len;
+ unsigned long end;
+
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+ /* Check to see whether len was rounded up from small -ve to zero */
+ if (len_in && !len)
+ return -EINVAL;
+
+ end = start + len;
+ if (end < start)
+ return -EINVAL;
+
+ if (end == start)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+
+ switch (opt) {
+ case PR_SET_VMA_ANON_NAME:
+ error = prctl_set_vma_anon_name(start, end, arg);
+ break;
+ default:
+ error = -EINVAL;
+ }
+
+ up_write(&mm->mmap_sem);
+
+ return error;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -1808,6 +1951,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
error = PR_MCE_KILL_DEFAULT;
break;
+ case PR_SET_VMA:
+ error = prctl_set_vma(arg2, arg3, arg4, arg5);
+ break;
default:
error = -EINVAL;
break;