From 3ce696d12b26c9c1cfedeb194a02606bef2854a4 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 31 Dec 2011 11:44:01 -0800 Subject: futex: Fix uninterruptible loop due to gate_area commit e6780f7243eddb133cc20ec37fa69317c218b709 upstream. It was found (by Sasha) that if you use a futex located in the gate area we get stuck in an uninterruptible infinite loop, much like the ZERO_PAGE issue. While looking at this problem, PeterZ realized you'll get into similar trouble when hitting any install_special_pages() mapping. And are there still drivers setting up their own special mmaps without page->mapping, and without special VM or pte flags to make get_user_pages fail? In most cases, if page->mapping is NULL, we do not need to retry at all: Linus points out that even /proc/sys/vm/drop_caches poses no problem, because it ends up using remove_mapping(), which takes care not to interfere when the page reference count is raised. But there is still one case which does need a retry: if memory pressure called shmem_writepage in between get_user_pages_fast dropping page table lock and our acquiring page lock, then the page gets switched from filecache to swapcache (and ->mapping set to NULL) whatever the refcount. Fault it back in to get the page->mapping needed for key->shared.inode. Reported-by: Sasha Levin Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index 8b6da250723..6487e4cb993 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -314,17 +314,29 @@ again: #endif lock_page(page_head); + + /* + * If page_head->mapping is NULL, then it cannot be a PageAnon + * page; but it might be the ZERO_PAGE or in the gate area or + * in a special mapping (all cases which we are happy to fail); + * or it may have been a good file page when get_user_pages_fast + * found it, but truncated or holepunched or subjected to + * invalidate_complete_page2 before we got the page lock (also + * cases which we are happy to fail). And we hold a reference, + * so refcount care in invalidate_complete_page's remove_mapping + * prevents drop_caches from setting mapping to NULL beneath us. + * + * The case we do have to guard against is when memory pressure made + * shmem_writepage move it from filecache to swapcache beneath us: + * an unlikely race, but we do need to retry for page_head->mapping. + */ if (!page_head->mapping) { + int shmem_swizzled = PageSwapCache(page_head); unlock_page(page_head); put_page(page_head); - /* - * ZERO_PAGE pages don't have a mapping. Avoid a busy loop - * trying to find one. RW mapping would have COW'd (and thus - * have a mapping) so this page is RO and won't ever change. - */ - if ((page_head == ZERO_PAGE(address))) - return -EFAULT; - goto again; + if (shmem_swizzled) + goto again; + return -EFAULT; } /* -- cgit v1.2.3 From be455802bf55a051bb467b22ad26653a0e78bc61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Feb 2012 12:08:34 +0100 Subject: futex: Cover all PI opcodes with cmpxchg enabled check commit 59263b513c11398cd66a52d4c5b2b118ce1e0359 upstream. Some of the newer futex PI opcodes do not check the cmpxchg enabled variable and call unconditionally into the handling functions. Cover all PI opcodes in a separate check. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index 6487e4cb993..a2a01ef0ee3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2640,6 +2640,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } + switch (cmd) { + case FUTEX_LOCK_PI: + case FUTEX_UNLOCK_PI: + case FUTEX_TRYLOCK_PI: + case FUTEX_WAIT_REQUEUE_PI: + case FUTEX_CMP_REQUEUE_PI: + if (!futex_cmpxchg_enabled) + return -ENOSYS; + } + switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; @@ -2661,16 +2671,13 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); break; case FUTEX_LOCK_PI: - if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, flags, val, timeout, 0); + ret = futex_lock_pi(uaddr, flags, val, timeout, 0); break; case FUTEX_UNLOCK_PI: - if (futex_cmpxchg_enabled) - ret = futex_unlock_pi(uaddr, flags); + ret = futex_unlock_pi(uaddr, flags); break; case FUTEX_TRYLOCK_PI: - if (futex_cmpxchg_enabled) - ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); + ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); break; case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY; -- cgit v1.2.3 From 631792ffa5b781301812b8514b8c0a10eb8ce5d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 19 Mar 2012 16:12:53 -0700 Subject: futex: Do not leak robust list to unprivileged process commit bdbb776f882f5ad431aa1e694c69c1c3d6a4a5b8 upstream. It was possible to extract the robust list head address from a setuid process if it had used set_robust_list(), allowing an ASLR info leak. This changes the permission checks to be the same as those used for similar info that comes out of /proc. Running a setuid program that uses robust futexes would have had: cred->euid != pcred->euid cred->euid == pcred->uid so the old permissions check would allow it. I'm not aware of any setuid programs that use robust futexes, so this is just a preventative measure. (This patch is based on changes from grsecurity.) Signed-off-by: Kees Cook Cc: Darren Hart Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Eric W. Biederman Cc: David Howells Cc: Serge E. Hallyn Cc: kernel-hardening@lists.openwall.com Cc: spender@grsecurity.net Link: http://lkml.kernel.org/r/20120319231253.GA20893@www.outflux.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'kernel/futex.c') diff --git a/kernel/futex.c b/kernel/futex.c index a2a01ef0ee3..11e8924b6ee 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -59,6 +59,7 @@ #include #include #include +#include #include @@ -2443,40 +2444,29 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, { struct robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); -- cgit v1.2.3