diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 10:30:07 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 10:30:07 -0700 |
commit | fbf4432ff71b7a25bef993a5312906946d27f446 (patch) | |
tree | cf3e0024af4b8f9376eff75743f1fa1526e40900 /fs | |
parent | c054be10ffdbd5507a1fd738067d76acfb4808fd (diff) | |
parent | 0cfb6aee70bddbef6ec796b255f588ce0e126766 (diff) | |
download | kernel_replicant_linux-fbf4432ff71b7a25bef993a5312906946d27f446.tar.gz kernel_replicant_linux-fbf4432ff71b7a25bef993a5312906946d27f446.tar.bz2 kernel_replicant_linux-fbf4432ff71b7a25bef993a5312906946d27f446.zip |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- most of the rest of MM
- a small number of misc things
- lib/ updates
- checkpatch
- autofs updates
- ipc/ updates
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (126 commits)
ipc: optimize semget/shmget/msgget for lots of keys
ipc/sem: play nicer with large nsops allocations
ipc/sem: drop sem_checkid helper
ipc: convert kern_ipc_perm.refcount from atomic_t to refcount_t
ipc: convert sem_undo_list.refcnt from atomic_t to refcount_t
ipc: convert ipc_namespace.count from atomic_t to refcount_t
kcov: support compat processes
sh: defconfig: cleanup from old Kconfig options
mn10300: defconfig: cleanup from old Kconfig options
m32r: defconfig: cleanup from old Kconfig options
drivers/pps: use surrounding "if PPS" to remove numerous dependency checks
drivers/pps: aesthetic tweaks to PPS-related content
cpumask: make cpumask_next() out-of-line
kmod: move #ifdef CONFIG_MODULES wrapper to Makefile
kmod: split off umh headers into its own file
MAINTAINERS: clarify kmod is just a kernel module loader
kmod: split out umh code into its own file
test_kmod: flip INT checks to be consistent
test_kmod: remove paranoid UINT_MAX check on uint range processing
vfat: deduplicate hex2bin()
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/aio.c | 8 | ||||
-rw-r--r-- | fs/autofs4/autofs_i.h | 22 | ||||
-rw-r--r-- | fs/autofs4/dev-ioctl.c | 44 | ||||
-rw-r--r-- | fs/binfmt_flat.c | 8 | ||||
-rw-r--r-- | fs/eventpoll.c | 30 | ||||
-rw-r--r-- | fs/f2fs/data.c | 5 | ||||
-rw-r--r-- | fs/fat/namei_vfat.c | 35 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 11 | ||||
-rw-r--r-- | fs/inode.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 15 | ||||
-rw-r--r-- | fs/proc/generic.c | 34 | ||||
-rw-r--r-- | fs/proc/internal.h | 2 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 2 | ||||
-rw-r--r-- | fs/proc/root.c | 2 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 81 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 5 | ||||
-rw-r--r-- | fs/ubifs/file.c | 5 | ||||
-rw-r--r-- | fs/userfaultfd.c | 20 |
18 files changed, 198 insertions, 133 deletions
@@ -373,6 +373,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; int rc; + /* + * We cannot support the _NO_COPY case here, because copy needs to + * happen under the ctx->completion_lock. That does not work with the + * migration workflow of MIGRATE_SYNC_NO_COPY. + */ + if (mode == MIGRATE_SYNC_NO_COPY) + return -EINVAL; + rc = 0; /* mapping->private_lock here protects against the kioctx teardown. */ diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index beef981aa54f..4737615f0eaa 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -11,10 +11,21 @@ #include <linux/auto_fs4.h> #include <linux/auto_dev-ioctl.h> + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/string.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/list.h> #include <linux/completion.h> +#include <asm/current.h> /* This is the range of ioctl() numbers we claim as ours */ #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY @@ -24,17 +35,6 @@ #define AUTOFS_DEV_IOCTL_IOC_COUNT \ (AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD) -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/time.h> -#include <linux/string.h> -#include <linux/wait.h> -#include <linux/sched.h> -#include <linux/mount.h> -#include <linux/namei.h> -#include <asm/current.h> -#include <linux/uaccess.h> - #ifdef pr_fmt #undef pr_fmt #endif diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index dd9f1bebb5a3..b7c816f39404 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -93,17 +93,17 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) * at the end of the struct. */ static struct autofs_dev_ioctl * - copy_dev_ioctl(struct autofs_dev_ioctl __user *in) +copy_dev_ioctl(struct autofs_dev_ioctl __user *in) { struct autofs_dev_ioctl tmp, *res; - if (copy_from_user(&tmp, in, sizeof(tmp))) + if (copy_from_user(&tmp, in, AUTOFS_DEV_IOCTL_SIZE)) return ERR_PTR(-EFAULT); - if (tmp.size < sizeof(tmp)) + if (tmp.size < AUTOFS_DEV_IOCTL_SIZE) return ERR_PTR(-EINVAL); - if (tmp.size > (PATH_MAX + sizeof(tmp))) + if (tmp.size > AUTOFS_DEV_IOCTL_SIZE + PATH_MAX) return ERR_PTR(-ENAMETOOLONG); res = memdup_user(in, tmp.size); @@ -133,8 +133,8 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) goto out; } - if (param->size > sizeof(*param)) { - err = invalid_str(param->path, param->size - sizeof(*param)); + if (param->size > AUTOFS_DEV_IOCTL_SIZE) { + err = invalid_str(param->path, param->size - AUTOFS_DEV_IOCTL_SIZE); if (err) { pr_warn( "path string terminator missing for cmd(0x%08x)\n", @@ -258,11 +258,6 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) if (err) goto out; - /* - * Find autofs super block that has the device number - * corresponding to the autofs fs we want to open. - */ - filp = dentry_open(&path, O_RDONLY, current_cred()); path_put(&path); if (IS_ERR(filp)) { @@ -451,7 +446,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, dev_t devid; int err = -ENOENT; - if (param->size <= sizeof(*param)) { + if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { err = -EINVAL; goto out; } @@ -539,7 +534,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, unsigned int devid, magic; int err = -ENOENT; - if (param->size <= sizeof(*param)) { + if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { err = -EINVAL; goto out; } @@ -628,10 +623,6 @@ static int _autofs_dev_ioctl(unsigned int command, ioctl_fn fn = NULL; int err = 0; - /* only root can play with this */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST); cmd = _IOC_NR(command); @@ -640,6 +631,14 @@ static int _autofs_dev_ioctl(unsigned int command, return -ENOTTY; } + /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD + * and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD + */ + if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && + cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Copy the parameters into kernel space. */ param = copy_dev_ioctl(user); if (IS_ERR(param)) @@ -706,7 +705,8 @@ out: return err; } -static long autofs_dev_ioctl(struct file *file, uint command, ulong u) +static long autofs_dev_ioctl(struct file *file, unsigned int command, + unsigned long u) { int err; @@ -715,9 +715,10 @@ static long autofs_dev_ioctl(struct file *file, uint command, ulong u) } #ifdef CONFIG_COMPAT -static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u) +static long autofs_dev_ioctl_compat(struct file *file, unsigned int command, + unsigned long u) { - return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u)); + return autofs_dev_ioctl(file, command, (unsigned long) compat_ptr(u)); } #else #define autofs_dev_ioctl_compat NULL @@ -733,7 +734,8 @@ static const struct file_operations _dev_ioctl_fops = { static struct miscdevice _autofs_dev_ioctl_misc = { .minor = AUTOFS_MINOR, .name = AUTOFS_DEVICE_NAME, - .fops = &_dev_ioctl_fops + .fops = &_dev_ioctl_fops, + .mode = 0644, }; MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 604a176df0c2..ce6537c50ec1 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -192,13 +192,11 @@ static int decompress_exec( memset(&strm, 0, sizeof(strm)); strm.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL); - if (strm.workspace == NULL) { - pr_debug("no memory for decompress workspace\n"); + if (!strm.workspace) return -ENOMEM; - } + buf = kmalloc(LBUFSIZE, GFP_KERNEL); - if (buf == NULL) { - pr_debug("no memory for read buffer\n"); + if (!buf) { retval = -ENOMEM; goto out_free; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index adbe328b957c..2fabd19cdeea 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -205,7 +205,7 @@ struct eventpoll { struct list_head rdllist; /* RB tree root used to store monitored fd structs */ - struct rb_root rbr; + struct rb_root_cached rbr; /* * This is a single linked list that chains all the "struct epitem" that @@ -796,7 +796,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); - rb_erase(&epi->rbn, &ep->rbr); + rb_erase_cached(&epi->rbn, &ep->rbr); spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) @@ -840,7 +840,7 @@ static void ep_free(struct eventpoll *ep) /* * Walks through the whole tree by unregistering poll callbacks. */ - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); ep_unregister_pollwait(ep, epi); @@ -856,7 +856,7 @@ static void ep_free(struct eventpoll *ep) * a lockdep warning. */ mutex_lock(&ep->mtx); - while ((rbp = rb_first(&ep->rbr)) != NULL) { + while ((rbp = rb_first_cached(&ep->rbr)) != NULL) { epi = rb_entry(rbp, struct epitem, rbn); ep_remove(ep, epi); cond_resched(); @@ -963,7 +963,7 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f) struct rb_node *rbp; mutex_lock(&ep->mtx); - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { struct epitem *epi = rb_entry(rbp, struct epitem, rbn); struct inode *inode = file_inode(epi->ffd.file); @@ -1040,7 +1040,7 @@ static int ep_alloc(struct eventpoll **pep) init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); - ep->rbr = RB_ROOT; + ep->rbr = RB_ROOT_CACHED; ep->ovflist = EP_UNACTIVE_PTR; ep->user = user; @@ -1066,7 +1066,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) struct epoll_filefd ffd; ep_set_ffd(&ffd, file, fd); - for (rbp = ep->rbr.rb_node; rbp; ) { + for (rbp = ep->rbr.rb_root.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); kcmp = ep_cmp_ffd(&ffd, &epi->ffd); if (kcmp > 0) @@ -1088,7 +1088,7 @@ static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long t struct rb_node *rbp; struct epitem *epi; - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (epi->ffd.fd == tfd) { if (toff == 0) @@ -1273,20 +1273,22 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) { int kcmp; - struct rb_node **p = &ep->rbr.rb_node, *parent = NULL; + struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL; struct epitem *epic; + bool leftmost = true; while (*p) { parent = *p; epic = rb_entry(parent, struct epitem, rbn); kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); - if (kcmp > 0) + if (kcmp > 0) { p = &parent->rb_right; - else + leftmost = false; + } else p = &parent->rb_left; } rb_link_node(&epi->rbn, parent, p); - rb_insert_color(&epi->rbn, &ep->rbr); + rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost); } @@ -1530,7 +1532,7 @@ error_remove_epi: list_del_rcu(&epi->fllink); spin_unlock(&tfile->f_lock); - rb_erase(&epi->rbn, &ep->rbr); + rb_erase_cached(&epi->rbn, &ep->rbr); error_unregister: ep_unregister_pollwait(ep, epi); @@ -1878,7 +1880,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) mutex_lock_nested(&ep->mtx, call_nests + 1); ep->visited = 1; list_add(&ep->visited_list_link, &visited_list); - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { ep_tovisit = epi->ffd.file->private_data; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a791aac4c5af..fb96bb71da00 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2253,7 +2253,10 @@ int f2fs_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); set_page_private(newpage, page_private(page)); - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6a7152d0c250..02c066663a3a 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -19,6 +19,8 @@ #include <linux/ctype.h> #include <linux/slab.h> #include <linux/namei.h> +#include <linux/kernel.h> + #include "fat.h" static inline unsigned long vfat_d_version(struct dentry *dentry) @@ -510,10 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, struct nls_table *nls) { const unsigned char *ip; - unsigned char nc; unsigned char *op; - unsigned int ec; - int i, k, fill; + int i, fill; int charlen; if (utf8) { @@ -530,33 +530,22 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, i < len && *outlen < FAT_LFN_LEN; *outlen += 1) { if (escape && (*ip == ':')) { + u8 uc[2]; + if (i > len - 5) return -EINVAL; - ec = 0; - for (k = 1; k < 5; k++) { - nc = ip[k]; - ec <<= 4; - if (nc >= '0' && nc <= '9') { - ec |= nc - '0'; - continue; - } - if (nc >= 'a' && nc <= 'f') { - ec |= nc - ('a' - 10); - continue; - } - if (nc >= 'A' && nc <= 'F') { - ec |= nc - ('A' - 10); - continue; - } + + if (hex2bin(uc, ip + 1, 2) < 0) return -EINVAL; - } - *op++ = ec & 0xFF; - *op++ = ec >> 8; + + *(wchar_t *)op = uc[0] << 8 | uc[1]; + + op += 2; ip += 5; i += 5; } else { charlen = nls->char2uni(ip, len - i, - (wchar_t *)op); + (wchar_t *)op); if (charlen < 0) return -EINVAL; ip += charlen; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7c02b3f738e1..59073e9f01a4 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -334,7 +334,7 @@ static void remove_huge_page(struct page *page) } static void -hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) +hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) { struct vm_area_struct *vma; @@ -498,7 +498,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) i_size_write(inode, offset); i_mmap_lock_write(mapping); - if (!RB_EMPTY_ROOT(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); i_mmap_unlock_write(mapping); remove_inode_hugepages(inode, offset, LLONG_MAX); @@ -523,7 +523,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) inode_lock(inode); i_mmap_lock_write(mapping); - if (!RB_EMPTY_ROOT(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, hole_start >> PAGE_SHIFT, hole_end >> PAGE_SHIFT); @@ -830,7 +830,10 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, rc = migrate_huge_page_move_mapping(mapping, newpage, page); if (rc != MIGRATEPAGE_SUCCESS) return rc; - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/inode.c b/fs/inode.c index 6a1626e0edaf..210054157a49 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -353,7 +353,7 @@ void address_space_init_once(struct address_space *mapping) init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); - mapping->i_mmap = RB_ROOT; + mapping->i_mmap = RB_ROOT_CACHED; } EXPORT_SYMBOL(address_space_init_once); diff --git a/fs/namei.c b/fs/namei.c index ddb6a7c2b3d4..1180f9c58093 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1129,9 +1129,18 @@ static int follow_automount(struct path *path, struct nameidata *nd, * of the daemon to instantiate them before they can be used. */ if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && - path->dentry->d_inode) - return -EISDIR; + LOOKUP_OPEN | LOOKUP_CREATE | + LOOKUP_AUTOMOUNT))) { + /* Positive dentry that isn't meant to trigger an + * automount, EISDIR will allow it to be used, + * otherwise there's no mount here "now" so return + * ENOENT. + */ + if (path->dentry->d_inode) + return -EISDIR; + else + return -ENOENT; + } if (path->dentry->d_sb->s_user_ns != &init_user_ns) return -EACCES; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index e3cda0b5968f..793a67574668 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -40,8 +40,8 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) { - return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, - subdir_node); + return rb_entry_safe(rb_first_cached(&dir->subdir), + struct proc_dir_entry, subdir_node); } static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) @@ -54,7 +54,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, const char *name, unsigned int len) { - struct rb_node *node = dir->subdir.rb_node; + struct rb_node *node = dir->subdir.rb_root.rb_node; while (node) { struct proc_dir_entry *de = rb_entry(node, @@ -75,8 +75,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, static bool pde_subdir_insert(struct proc_dir_entry *dir, struct proc_dir_entry *de) { - struct rb_root *root = &dir->subdir; - struct rb_node **new = &root->rb_node, *parent = NULL; + struct rb_root_cached *root = &dir->subdir; + struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; + bool leftmost = true; /* Figure out where to put new node */ while (*new) { @@ -88,15 +89,16 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, parent = *new; if (result < 0) new = &(*new)->rb_left; - else if (result > 0) + else if (result > 0) { new = &(*new)->rb_right; - else + leftmost = false; + } else return false; } /* Add new node and rebalance tree. */ rb_link_node(&de->subdir_node, parent, new); - rb_insert_color(&de->subdir_node, root); + rb_insert_color_cached(&de->subdir_node, root, leftmost); return true; } @@ -369,7 +371,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->namelen = qstr.len; ent->mode = mode; ent->nlink = nlink; - ent->subdir = RB_ROOT; + ent->subdir = RB_ROOT_CACHED; atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); @@ -499,6 +501,14 @@ out: } EXPORT_SYMBOL(proc_create_data); +struct proc_dir_entry *proc_create(const char *name, umode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops) +{ + return proc_create_data(name, mode, parent, proc_fops, NULL); +} +EXPORT_SYMBOL(proc_create); + void proc_set_size(struct proc_dir_entry *de, loff_t size) { de->size = size; @@ -545,7 +555,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) de = pde_subdir_find(parent, fn, len); if (de) - rb_erase(&de->subdir_node, &parent->subdir); + rb_erase_cached(&de->subdir_node, &parent->subdir); write_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -582,13 +592,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) write_unlock(&proc_subdir_lock); return -ENOENT; } - rb_erase(&root->subdir_node, &parent->subdir); + rb_erase_cached(&root->subdir_node, &parent->subdir); de = root; while (1) { next = pde_subdir_first(de); if (next) { - rb_erase(&next->subdir_node, &de->subdir); + rb_erase_cached(&next->subdir_node, &de->subdir); de = next; continue; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 2cbfcd32e884..a34195e92b20 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -40,7 +40,7 @@ struct proc_dir_entry { const struct inode_operations *proc_iops; const struct file_operations *proc_fops; struct proc_dir_entry *parent; - struct rb_root subdir; + struct rb_root_cached subdir; struct rb_node subdir_node; void *data; atomic_t count; /* use count */ diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index d72fc40241d9..a2bf369c923d 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -196,7 +196,7 @@ static __net_init int proc_net_ns_init(struct net *net) if (!netd) goto out; - netd->subdir = RB_ROOT; + netd->subdir = RB_ROOT_CACHED; netd->data = net; netd->nlink = 2; netd->namelen = 3; diff --git a/fs/proc/root.c b/fs/proc/root.c index deecb397daa3..926fb27f4ca2 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -210,7 +210,7 @@ struct proc_dir_entry proc_root = { .proc_iops = &proc_root_inode_operations, .proc_fops = &proc_root_operations, .parent = &proc_root, - .subdir = RB_ROOT, + .subdir = RB_ROOT_CACHED, .name = "/proc", }; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a290966f91ec..7b40e11ede9b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -268,8 +268,7 @@ static int do_maps_open(struct inode *inode, struct file *file, * Indicate if the VMA is a stack for the given task; for * /proc/PID/maps that is the stack of the main task. */ -static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma) +static int is_stack(struct vm_area_struct *vma) { /* * We make no effort to guess what a given thread considers to be @@ -302,7 +301,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; - struct proc_maps_private *priv = m->private; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; @@ -350,7 +348,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(priv, vma)) + if (is_stack(vma)) name = "[stack]"; } @@ -549,6 +547,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } } else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); + else if (is_device_private_entry(swpent)) + page = device_private_entry_to_page(swpent); } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { page = find_get_entry(vma->vm_file->f_mapping, @@ -608,13 +608,14 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { - smaps_pmd_entry(pmd, addr, walk); + if (pmd_present(*pmd)) + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); - return 0; + goto out; } if (pmd_trans_unstable(pmd)) - return 0; + goto out; /* * The mmap_sem held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things @@ -624,6 +625,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, for (; addr != end; pte++, addr += PAGE_SIZE) smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); +out: cond_resched(); return 0; } @@ -712,6 +714,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); + else if (is_device_private_entry(swpent)) + page = device_private_entry_to_page(swpent); } if (page) { int mapcount = page_mapcount(page); @@ -977,17 +981,22 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, { pmd_t pmd = *pmdp; - /* See comment in change_huge_pmd() */ - pmdp_invalidate(vma, addr, pmdp); - if (pmd_dirty(*pmdp)) - pmd = pmd_mkdirty(pmd); - if (pmd_young(*pmdp)) - pmd = pmd_mkyoung(pmd); - - pmd = pmd_wrprotect(pmd); - pmd = pmd_clear_soft_dirty(pmd); - - set_pmd_at(vma->vm_mm, addr, pmdp, pmd); + if (pmd_present(pmd)) { + /* See comment in change_huge_pmd() */ + pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(*pmdp)) + pmd = pmd_mkdirty(pmd); + if (pmd_young(*pmdp)) + pmd = pmd_mkyoung(pmd); + + pmd = pmd_wrprotect(pmd); + pmd = pmd_clear_soft_dirty(pmd); + + set_pmd_at(vma->vm_mm, addr, pmdp, pmd); + } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + pmd = pmd_swp_clear_soft_dirty(pmd); + set_pmd_at(vma->vm_mm, addr, pmdp, pmd); + } } #else static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, @@ -1012,6 +1021,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, goto out; } + if (!pmd_present(*pmd)) + goto out; + page = pmd_page(*pmd); /* Clear accessed and referenced bits. */ @@ -1254,7 +1266,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; - page = vm_normal_page(vma, addr, pte); + page = _vm_normal_page(vma, addr, pte, true); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { @@ -1267,6 +1279,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, flags |= PM_SWAP; if (is_migration_entry(entry)) page = migration_entry_to_page(entry); + + if (is_device_private_entry(entry)) + page = device_private_entry_to_page(entry); } if (page && !PageAnon(page)) @@ -1293,27 +1308,33 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (ptl) { u64 flags = 0, frame = 0; pmd_t pmd = *pmdp; + struct page *page = NULL; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; - /* - * Currently pmd for thp is always present because thp - * can not be swapped-out, migrated, or HWPOISONed - * (split in such cases instead.) - * This if-check is just to prepare for future implementation. - */ if (pmd_present(pmd)) { - struct page *page = pmd_page(pmd); - - if (page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; + page = pmd_page(pmd); flags |= PM_PRESENT; if (pm->show_pfn) frame = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); } +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION + else if (is_swap_pmd(pmd)) { + swp_entry_t entry = pmd_to_swp_entry(pmd); + + frame = swp_type(entry) | + (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + flags |= PM_SWAP; + VM_BUG_ON(!is_pmd_migration_entry(pmd)); + page = migration_entry_to_page(entry); + } +#endif + + if (page && page_mapcount(page) == 1) + flags |= PM_MMAP_EXCLUSIVE; for (; addr != end; addr += PAGE_SIZE) { pagemap_entry_t pme = make_pme(frame, flags); @@ -1746,7 +1767,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); - } else if (is_stack(proc_priv, vma)) { + } else if (is_stack(vma)) { seq_puts(m, " stack"); } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 23266694db11..dea90b566a6e 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -125,8 +125,7 @@ unsigned long task_statm(struct mm_struct *mm, return size; } -static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma) +static int is_stack(struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; @@ -178,7 +177,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); seq_file_path(m, file, ""); - } else if (mm && is_stack(priv, vma)) { + } else if (mm && is_stack(vma)) { seq_pad(m, ' '); seq_printf(m, "[stack]"); } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f90a466ea5db..a02aa59d1e24 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1490,7 +1490,10 @@ static int ubifs_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); } - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } #endif diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 5419e7da82ba..ef4b48d1ea42 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -381,8 +381,26 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in __get_user_pages if userfaultfd_release waits on the * caller of handle_userfault to release the mmap_sem. */ - if (unlikely(ACCESS_ONCE(ctx->released))) + if (unlikely(ACCESS_ONCE(ctx->released))) { + /* + * Don't return VM_FAULT_SIGBUS in this case, so a non + * cooperative manager can close the uffd after the + * last UFFDIO_COPY, without risking to trigger an + * involuntary SIGBUS if the process was starting the + * userfaultfd while the userfaultfd was still armed + * (but after the last UFFDIO_COPY). If the uffd + * wasn't already closed when the userfault reached + * this point, that would normally be solved by + * userfaultfd_must_wait returning 'false'. + * + * If we were to return VM_FAULT_SIGBUS here, the non + * cooperative manager would be instead forced to + * always call UFFDIO_UNREGISTER before it can safely + * close the uffd. + */ + ret = VM_FAULT_NOPAGE; goto out; + } /* * Check that we can return VM_FAULT_RETRY. |