From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 20 Nov 2015 15:57:21 -0800 Subject: kernel/signal.c: unexport sigsuspend() sigsuspend() is nowhere used except in signal.c itself, so we can mark it static do not pollute the global namespace. But this patch is more than a boring cleanup patch, it fixes a real issue on UserModeLinux. UML has a special console driver to display ttys using xterm, or other terminal emulators, on the host side. Vegard reported that sometimes UML is unable to spawn a xterm and he's facing the following warning: WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0() It turned out that this warning makes absolutely no sense as the UML xterm code calls sigsuspend() on the host side, at least it tries. But as the kernel itself offers a sigsuspend() symbol the linker choose this one instead of the glibc wrapper. Interestingly this code used to work since ever but always blocked signals on the wrong side. Some recent kernel change made the WARN_ON() trigger and uncovered the bug. It is a wonderful example of how much works by chance on computers. :-) Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()") Signed-off-by: Richard Weinberger Reported-by: Vegard Nossum Tested-by: Vegard Nossum Acked-by: Oleg Nesterov Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index c0b01fe24bbd..f3f1f7a972fd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3503,7 +3503,7 @@ SYSCALL_DEFINE0(pause) #endif -int sigsuspend(sigset_t *set) +static int sigsuspend(sigset_t *set) { current->saved_sigmask = current->blocked; set_current_blocked(set); -- cgit v1.2.3 From 7625b3a0007decf2b135cb47ca67abc78a7b1bc1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 20 Nov 2015 15:57:24 -0800 Subject: kernel/panic.c: turn off locks debug before releasing console lock Commit 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") introduced an unwanted bad unlock balance report when panic() is called directly and not from OOPS (e.g. from out_of_memory()). The difference is that in case of OOPS we disable locks debug in oops_enter() and on direct panic call nobody does that. Fixes: 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") Reported-by: kernel test robot Signed-off-by: Vitaly Kuznetsov Cc: HATAYAMA Daisuke Cc: Masami Hiramatsu Cc: Jiri Kosina Cc: Baoquan He Cc: Prarit Bhargava Cc: Xie XiuQi Cc: Seth Jennings Cc: "K. Y. Srinivasan" Cc: Jan Kara Cc: Petr Mladek Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 4579dbb7ed87..4b150bc0c6c1 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -152,8 +152,11 @@ void panic(const char *fmt, ...) * We may have ended up stopping the CPU holding the lock (in * smp_send_stop()) while still having some valuable data in the console * buffer. Try to acquire the lock then release it regardless of the - * result. The release will also print the buffers out. + * result. The release will also print the buffers out. Locks debug + * should be disabled to avoid reporting bad unlock balance when + * panic() is not being callled from OOPS. */ + debug_locks_off(); console_trylock(); console_unlock(); -- cgit v1.2.3 From 81b1a832d79749058863cffe2c0ed4ef40f6e6ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Nov 2015 11:39:54 -0800 Subject: pidns: fix NULL dereference in __task_pid_nr_ns() I got a crash during a "perf top" session that was caused by a race in __task_pid_nr_ns() : pid_nr_ns() was inlined, but apparently compiler chose to read task->pids[type].pid twice, and the pid->level dereference crashed because we got a NULL pointer at the second read : if (pid && ns->level <= pid->level) { // CRASH Just use RCU API properly to solve this race, and not worry about "perf top" crashing hosts :( get_task_pid() can benefit from same fix. Signed-off-by: Eric Dumazet Signed-off-by: Linus Torvalds --- kernel/pid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/pid.c b/kernel/pid.c index ca368793808e..78b3d9f80d44 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -467,7 +467,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) rcu_read_lock(); if (type != PIDTYPE_PID) task = task->group_leader; - pid = get_pid(task->pids[type].pid); + pid = get_pid(rcu_dereference(task->pids[type].pid)); rcu_read_unlock(); return pid; } @@ -528,7 +528,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, if (likely(pid_alive(task))) { if (type != PIDTYPE_PID) task = task->group_leader; - nr = pid_nr_ns(task->pids[type].pid, ns); + nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); } rcu_read_unlock(); -- cgit v1.2.3 From 20ef10c1b3068f105004e247d8e7dd8120fa4b9a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 26 Nov 2015 09:42:08 +1030 Subject: module: Use the same logic for setting and unsetting RO/NX When setting a module's RO and NX permissions, set_section_ro_nx() is used, but when clearing them, unset_module_{init,core}_ro_nx() are used. The unset functions don't have the same checks the set function has for partial page protections. It's probably harmless, but it's still confusingly asymmetrical. Instead, use the same logic to do both. Also add some new set_module_{init,core}_ro_nx() helper functions for more symmetry with the unset functions. Signed-off-by: Josh Poimboeuf Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/module.c | 57 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 8f051a106676..14b224967e7b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1886,7 +1886,9 @@ void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, static void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, - unsigned long total_size) + unsigned long total_size, + int (*set_ro)(unsigned long start, int num_pages), + int (*set_nx)(unsigned long start, int num_pages)) { /* begin and end PFNs of the current subsection */ unsigned long begin_pfn; @@ -1898,7 +1900,7 @@ static void set_section_ro_nx(void *base, * - Do not protect last partial page. */ if (ro_size > 0) - set_page_attributes(base, base + ro_size, set_memory_ro); + set_page_attributes(base, base + ro_size, set_ro); /* * Set NX permissions for module data: @@ -1909,28 +1911,36 @@ static void set_section_ro_nx(void *base, begin_pfn = PFN_UP((unsigned long)base + text_size); end_pfn = PFN_UP((unsigned long)base + total_size); if (end_pfn > begin_pfn) - set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + set_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); } } +static void set_module_core_ro_nx(struct module *mod) +{ + set_section_ro_nx(mod->module_core, mod->core_text_size, + mod->core_ro_size, mod->core_size, + set_memory_ro, set_memory_nx); +} + static void unset_module_core_ro_nx(struct module *mod) { - set_page_attributes(mod->module_core + mod->core_text_size, - mod->module_core + mod->core_size, - set_memory_x); - set_page_attributes(mod->module_core, - mod->module_core + mod->core_ro_size, - set_memory_rw); + set_section_ro_nx(mod->module_core, mod->core_text_size, + mod->core_ro_size, mod->core_size, + set_memory_rw, set_memory_x); +} + +static void set_module_init_ro_nx(struct module *mod) +{ + set_section_ro_nx(mod->module_init, mod->init_text_size, + mod->init_ro_size, mod->init_size, + set_memory_ro, set_memory_nx); } static void unset_module_init_ro_nx(struct module *mod) { - set_page_attributes(mod->module_init + mod->init_text_size, - mod->module_init + mod->init_size, - set_memory_x); - set_page_attributes(mod->module_init, - mod->module_init + mod->init_ro_size, - set_memory_rw); + set_section_ro_nx(mod->module_init, mod->init_text_size, + mod->init_ro_size, mod->init_size, + set_memory_rw, set_memory_x); } /* Iterate through all modules and set each module's text as RW */ @@ -1979,7 +1989,8 @@ void set_all_modules_text_ro(void) mutex_unlock(&module_mutex); } #else -static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static void set_module_core_ro_nx(struct module *mod) { } +static void set_module_init_ro_nx(struct module *mod) { } static void unset_module_core_ro_nx(struct module *mod) { } static void unset_module_init_ro_nx(struct module *mod) { } #endif @@ -3373,17 +3384,9 @@ static int complete_formation(struct module *mod, struct load_info *info) /* This relies on module_mutex for list integrity. */ module_bug_finalize(info->hdr, info->sechdrs, mod); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); - - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); + /* Set RO and NX regions */ + set_module_init_ro_nx(mod); + set_module_core_ro_nx(mod); /* Mark state as coming so strong_try_module_get() ignores us, * but kallsyms etc. can see us. */ -- cgit v1.2.3 From c65abf358f211c3f88c8ed714dff25775ab49fc1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Nov 2015 09:43:08 +1030 Subject: gcov: use within_module() helper. An exact mapping would be within_module_core(), but at this stage (MODULE_STATE_GOING) the init section is empty, and this is clearer. Reviewed-by: Peter Oberparleiter Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/gcov/base.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 7080ae1eb6c1..2f9df37940a0 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -123,11 +123,6 @@ void gcov_enable_events(void) } #ifdef CONFIG_MODULES -static inline int within(void *addr, void *start, unsigned long size) -{ - return ((addr >= start) && (addr < start + size)); -} - /* Update list and generate events when modules are unloaded. */ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, void *data) @@ -142,7 +137,7 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, /* Remove entries located in module from linked list. */ while ((info = gcov_info_next(info))) { - if (within(info, mod->module_core, mod->core_size)) { + if (within_module((unsigned long)info, mod)) { gcov_info_unlink(prev, info); if (gcov_events_enabled) gcov_event(GCOV_REMOVE, info); -- cgit v1.2.3 From 7523e4dc5057e157212b4741abd6256e03404cf1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Nov 2015 09:44:08 +1030 Subject: module: use a structure to encapsulate layout. Makes it easier to handle init vs core cleanly, though the change is fairly invasive across random architectures. It simplifies the rbtree code immediately, however, while keeping the core data together in the same cachline (now iff the rbtree code is enabled). Acked-by: Peter Zijlstra Reviewed-by: Josh Poimboeuf Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/debug/kdb/kdb_main.c | 4 +- kernel/module.c | 199 ++++++++++++++++++++------------------------ 2 files changed, 94 insertions(+), 109 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4121345498e0..2a20c0dfdafc 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2021,7 +2021,7 @@ static int kdb_lsmod(int argc, const char **argv) continue; kdb_printf("%-20s%8u 0x%p ", mod->name, - mod->core_size, (void *)mod); + mod->core_layout.size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); #endif @@ -2031,7 +2031,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf(" (Loading)"); else kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->module_core); + kdb_printf(" 0x%p", mod->core_layout.base); #ifdef CONFIG_MODULE_UNLOAD { diff --git a/kernel/module.c b/kernel/module.c index 14b224967e7b..a0a3d6d9d5e8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -108,13 +108,6 @@ static LIST_HEAD(modules); * Use a latched RB-tree for __module_address(); this allows us to use * RCU-sched lookups of the address from any context. * - * Because modules have two address ranges: init and core, we need two - * latch_tree_nodes entries. Therefore we need the back-pointer from - * mod_tree_node. - * - * Because init ranges are short lived we mark them unlikely and have placed - * them outside the critical cacheline in struct module. - * * This is conditional on PERF_EVENTS || TRACING because those can really hit * __module_address() hard by doing a lot of stack unwinding; potentially from * NMI context. @@ -122,24 +115,16 @@ static LIST_HEAD(modules); static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) { - struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); - struct module *mod = mtn->mod; + struct module_layout *layout = container_of(n, struct module_layout, mtn.node); - if (unlikely(mtn == &mod->mtn_init)) - return (unsigned long)mod->module_init; - - return (unsigned long)mod->module_core; + return (unsigned long)layout->base; } static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) { - struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); - struct module *mod = mtn->mod; - - if (unlikely(mtn == &mod->mtn_init)) - return (unsigned long)mod->init_size; + struct module_layout *layout = container_of(n, struct module_layout, mtn.node); - return (unsigned long)mod->core_size; + return (unsigned long)layout->size; } static __always_inline bool @@ -197,23 +182,23 @@ static void __mod_tree_remove(struct mod_tree_node *node) */ static void mod_tree_insert(struct module *mod) { - mod->mtn_core.mod = mod; - mod->mtn_init.mod = mod; + mod->core_layout.mtn.mod = mod; + mod->init_layout.mtn.mod = mod; - __mod_tree_insert(&mod->mtn_core); - if (mod->init_size) - __mod_tree_insert(&mod->mtn_init); + __mod_tree_insert(&mod->core_layout.mtn); + if (mod->init_layout.size) + __mod_tree_insert(&mod->init_layout.mtn); } static void mod_tree_remove_init(struct module *mod) { - if (mod->init_size) - __mod_tree_remove(&mod->mtn_init); + if (mod->init_layout.size) + __mod_tree_remove(&mod->init_layout.mtn); } static void mod_tree_remove(struct module *mod) { - __mod_tree_remove(&mod->mtn_core); + __mod_tree_remove(&mod->core_layout.mtn); mod_tree_remove_init(mod); } @@ -267,9 +252,9 @@ static void __mod_update_bounds(void *base, unsigned int size) static void mod_update_bounds(struct module *mod) { - __mod_update_bounds(mod->module_core, mod->core_size); - if (mod->init_size) - __mod_update_bounds(mod->module_init, mod->init_size); + __mod_update_bounds(mod->core_layout.base, mod->core_layout.size); + if (mod->init_layout.size) + __mod_update_bounds(mod->init_layout.base, mod->init_layout.size); } #ifdef CONFIG_KGDB_KDB @@ -1214,7 +1199,7 @@ struct module_attribute module_uevent = static ssize_t show_coresize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%u\n", mk->mod->core_size); + return sprintf(buffer, "%u\n", mk->mod->core_layout.size); } static struct module_attribute modinfo_coresize = @@ -1223,7 +1208,7 @@ static struct module_attribute modinfo_coresize = static ssize_t show_initsize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%u\n", mk->mod->init_size); + return sprintf(buffer, "%u\n", mk->mod->init_layout.size); } static struct module_attribute modinfo_initsize = @@ -1917,29 +1902,29 @@ static void set_section_ro_nx(void *base, static void set_module_core_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_core, mod->core_text_size, - mod->core_ro_size, mod->core_size, + set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, + mod->core_layout.ro_size, mod->core_layout.size, set_memory_ro, set_memory_nx); } static void unset_module_core_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_core, mod->core_text_size, - mod->core_ro_size, mod->core_size, + set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, + mod->core_layout.ro_size, mod->core_layout.size, set_memory_rw, set_memory_x); } static void set_module_init_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_init, mod->init_text_size, - mod->init_ro_size, mod->init_size, + set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, + mod->init_layout.ro_size, mod->init_layout.size, set_memory_ro, set_memory_nx); } static void unset_module_init_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_init, mod->init_text_size, - mod->init_ro_size, mod->init_size, + set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, + mod->init_layout.ro_size, mod->init_layout.size, set_memory_rw, set_memory_x); } @@ -1952,14 +1937,14 @@ void set_all_modules_text_rw(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, + if ((mod->core_layout.base) && (mod->core_layout.text_size)) { + set_page_attributes(mod->core_layout.base, + mod->core_layout.base + mod->core_layout.text_size, set_memory_rw); } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, + if ((mod->init_layout.base) && (mod->init_layout.text_size)) { + set_page_attributes(mod->init_layout.base, + mod->init_layout.base + mod->init_layout.text_size, set_memory_rw); } } @@ -1975,14 +1960,14 @@ void set_all_modules_text_ro(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, + if ((mod->core_layout.base) && (mod->core_layout.text_size)) { + set_page_attributes(mod->core_layout.base, + mod->core_layout.base + mod->core_layout.text_size, set_memory_ro); } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, + if ((mod->init_layout.base) && (mod->init_layout.text_size)) { + set_page_attributes(mod->init_layout.base, + mod->init_layout.base + mod->init_layout.text_size, set_memory_ro); } } @@ -2047,16 +2032,16 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - module_memfree(mod->module_init); + module_memfree(mod->init_layout.base); kfree(mod->args); percpu_modfree(mod); /* Free lock-classes; relies on the preceding sync_rcu(). */ - lockdep_free_key_range(mod->module_core, mod->core_size); + lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); /* Finally, free the core (containing the module structure) */ unset_module_core_ro_nx(mod); - module_memfree(mod->module_core); + module_memfree(mod->core_layout.base); #ifdef CONFIG_MPU update_protections(current->mm); @@ -2259,20 +2244,20 @@ static void layout_sections(struct module *mod, struct load_info *info) || s->sh_entsize != ~0UL || strstarts(sname, ".init")) continue; - s->sh_entsize = get_offset(mod, &mod->core_size, s, i); + s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i); pr_debug("\t%s\n", sname); } switch (m) { case 0: /* executable */ - mod->core_size = debug_align(mod->core_size); - mod->core_text_size = mod->core_size; + mod->core_layout.size = debug_align(mod->core_layout.size); + mod->core_layout.text_size = mod->core_layout.size; break; case 1: /* RO: text and ro-data */ - mod->core_size = debug_align(mod->core_size); - mod->core_ro_size = mod->core_size; + mod->core_layout.size = debug_align(mod->core_layout.size); + mod->core_layout.ro_size = mod->core_layout.size; break; case 3: /* whole core */ - mod->core_size = debug_align(mod->core_size); + mod->core_layout.size = debug_align(mod->core_layout.size); break; } } @@ -2288,21 +2273,21 @@ static void layout_sections(struct module *mod, struct load_info *info) || s->sh_entsize != ~0UL || !strstarts(sname, ".init")) continue; - s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) + s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i) | INIT_OFFSET_MASK); pr_debug("\t%s\n", sname); } switch (m) { case 0: /* executable */ - mod->init_size = debug_align(mod->init_size); - mod->init_text_size = mod->init_size; + mod->init_layout.size = debug_align(mod->init_layout.size); + mod->init_layout.text_size = mod->init_layout.size; break; case 1: /* RO: text and ro-data */ - mod->init_size = debug_align(mod->init_size); - mod->init_ro_size = mod->init_size; + mod->init_layout.size = debug_align(mod->init_layout.size); + mod->init_layout.ro_size = mod->init_layout.size; break; case 3: /* whole init */ - mod->init_size = debug_align(mod->init_size); + mod->init_layout.size = debug_align(mod->init_layout.size); break; } } @@ -2477,7 +2462,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) /* Put symbol section at end of init part of module. */ symsect->sh_flags |= SHF_ALLOC; - symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symsect->sh_entsize = get_offset(mod, &mod->init_layout.size, symsect, info->index.sym) | INIT_OFFSET_MASK; pr_debug("\t%s\n", info->secstrings + symsect->sh_name); @@ -2494,16 +2479,16 @@ static void layout_symtab(struct module *mod, struct load_info *info) } /* Append room for core symbols at end of core part. */ - info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); - info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); - mod->core_size += strtab_size; - mod->core_size = debug_align(mod->core_size); + info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1); + info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym); + mod->core_layout.size += strtab_size; + mod->core_layout.size = debug_align(mod->core_layout.size); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; - strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect, info->index.str) | INIT_OFFSET_MASK; - mod->init_size = debug_align(mod->init_size); + mod->init_layout.size = debug_align(mod->init_layout.size); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); } @@ -2524,8 +2509,8 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); - mod->core_symtab = dst = mod->module_core + info->symoffs; - mod->core_strtab = s = mod->module_core + info->stroffs; + mod->core_symtab = dst = mod->core_layout.base + info->symoffs; + mod->core_strtab = s = mod->core_layout.base + info->stroffs; src = mod->symtab; for (ndst = i = 0; i < mod->num_symtab; i++) { if (i == 0 || @@ -2975,7 +2960,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc(mod->core_size); + ptr = module_alloc(mod->core_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2985,11 +2970,11 @@ static int move_module(struct module *mod, struct load_info *info) if (!ptr) return -ENOMEM; - memset(ptr, 0, mod->core_size); - mod->module_core = ptr; + memset(ptr, 0, mod->core_layout.size); + mod->core_layout.base = ptr; - if (mod->init_size) { - ptr = module_alloc(mod->init_size); + if (mod->init_layout.size) { + ptr = module_alloc(mod->init_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. This block doesn't need to be @@ -2998,13 +2983,13 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_memfree(mod->module_core); + module_memfree(mod->core_layout.base); return -ENOMEM; } - memset(ptr, 0, mod->init_size); - mod->module_init = ptr; + memset(ptr, 0, mod->init_layout.size); + mod->init_layout.base = ptr; } else - mod->module_init = NULL; + mod->init_layout.base = NULL; /* Transfer each section which specifies SHF_ALLOC */ pr_debug("final section addresses:\n"); @@ -3016,10 +3001,10 @@ static int move_module(struct module *mod, struct load_info *info) continue; if (shdr->sh_entsize & INIT_OFFSET_MASK) - dest = mod->module_init + dest = mod->init_layout.base + (shdr->sh_entsize & ~INIT_OFFSET_MASK); else - dest = mod->module_core + shdr->sh_entsize; + dest = mod->core_layout.base + shdr->sh_entsize; if (shdr->sh_type != SHT_NOBITS) memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); @@ -3081,12 +3066,12 @@ static void flush_module_icache(const struct module *mod) * Do it before processing of module parameters, so the module * can provide parameter accessor functions of its own. */ - if (mod->module_init) - flush_icache_range((unsigned long)mod->module_init, - (unsigned long)mod->module_init - + mod->init_size); - flush_icache_range((unsigned long)mod->module_core, - (unsigned long)mod->module_core + mod->core_size); + if (mod->init_layout.base) + flush_icache_range((unsigned long)mod->init_layout.base, + (unsigned long)mod->init_layout.base + + mod->init_layout.size); + flush_icache_range((unsigned long)mod->core_layout.base, + (unsigned long)mod->core_layout.base + mod->core_layout.size); set_fs(old_fs); } @@ -3144,8 +3129,8 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); - module_memfree(mod->module_init); - module_memfree(mod->module_core); + module_memfree(mod->init_layout.base); + module_memfree(mod->core_layout.base); } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -3232,7 +3217,7 @@ static noinline int do_init_module(struct module *mod) ret = -ENOMEM; goto fail; } - freeinit->module_init = mod->module_init; + freeinit->module_init = mod->init_layout.base; /* * We want to find out whether @mod uses async during init. Clear @@ -3292,10 +3277,10 @@ static noinline int do_init_module(struct module *mod) mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - mod->module_init = NULL; - mod->init_size = 0; - mod->init_ro_size = 0; - mod->init_text_size = 0; + mod->init_layout.base = NULL; + mod->init_layout.size = 0; + mod->init_layout.ro_size = 0; + mod->init_layout.text_size = 0; /* * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we @@ -3575,7 +3560,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ - lockdep_free_key_range(mod->module_core, mod->core_size); + lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); module_deallocate(mod, info); free_copy: @@ -3653,9 +3638,9 @@ static const char *get_ksymbol(struct module *mod, /* At worse, next value is at end of module */ if (within_module_init(addr, mod)) - nextval = (unsigned long)mod->module_init+mod->init_text_size; + nextval = (unsigned long)mod->init_layout.base+mod->init_layout.text_size; else - nextval = (unsigned long)mod->module_core+mod->core_text_size; + nextval = (unsigned long)mod->core_layout.base+mod->core_layout.text_size; /* Scan for closest preceding symbol, and next symbol. (ELF starts real symbols at 1). */ @@ -3902,7 +3887,7 @@ static int m_show(struct seq_file *m, void *p) return 0; seq_printf(m, "%s %u", - mod->name, mod->init_size + mod->core_size); + mod->name, mod->init_layout.size + mod->core_layout.size); print_unload_info(m, mod); /* Informative for users. */ @@ -3911,7 +3896,7 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%pK", mod->module_core); + seq_printf(m, " 0x%pK", mod->core_layout.base); /* Taints info */ if (mod->taints) @@ -4054,8 +4039,8 @@ struct module *__module_text_address(unsigned long addr) struct module *mod = __module_address(addr); if (mod) { /* Make sure it's within the text section. */ - if (!within(addr, mod->module_init, mod->init_text_size) - && !within(addr, mod->module_core, mod->core_text_size)) + if (!within(addr, mod->init_layout.base, mod->init_layout.text_size) + && !within(addr, mod->core_layout.base, mod->core_layout.text_size)) mod = NULL; } return mod; -- cgit v1.2.3 From 85c898db6327353d38f3dd428457384cf81f83f8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Nov 2015 09:45:08 +1030 Subject: module: clean up RO/NX handling. Modules have three sections: text, rodata and writable data. The code handled the case where these overlapped, however they never can: debug_align() ensures they are always page-aligned. This is why we got away with manually traversing the pages in set_all_modules_text_rw() without rounding. We create three helper functions: frob_text(), frob_rodata() and frob_writable_data(). We then call these explicitly at every point, so it's clear what we're doing. We also expose module_enable_ro() and module_disable_ro() for livepatch to use. Reviewed-by: Josh Poimboeuf Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/module.c | 168 ++++++++++++++++++++++++++------------------------------ 1 file changed, 77 insertions(+), 91 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index a0a3d6d9d5e8..77212128f34a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -80,15 +80,6 @@ # define debug_align(X) (X) #endif -/* - * Given BASE and SIZE this macro calculates the number of pages the - * memory regions occupies - */ -#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ - (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ - PFN_DOWN((unsigned long)BASE) + 1) \ - : (0UL)) - /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1858,74 +1849,75 @@ static void mod_sysfs_teardown(struct module *mod) /* * LKM RO/NX protection: protect module's text/ro-data * from modification and any data from execution. + * + * General layout of module is: + * [text] [read-only-data] [writable data] + * text_size -----^ ^ ^ + * ro_size ------------------------| | + * size -------------------------------------------| + * + * These values are always page-aligned (as is base) */ -void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +static void frob_text(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) { - unsigned long begin_pfn = PFN_DOWN((unsigned long)start); - unsigned long end_pfn = PFN_DOWN((unsigned long)end); - - if (end_pfn > begin_pfn) - set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base, + layout->text_size >> PAGE_SHIFT); } -static void set_section_ro_nx(void *base, - unsigned long text_size, - unsigned long ro_size, - unsigned long total_size, - int (*set_ro)(unsigned long start, int num_pages), - int (*set_nx)(unsigned long start, int num_pages)) +static void frob_rodata(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) { - /* begin and end PFNs of the current subsection */ - unsigned long begin_pfn; - unsigned long end_pfn; - - /* - * Set RO for module text and RO-data: - * - Always protect first page. - * - Do not protect last partial page. - */ - if (ro_size > 0) - set_page_attributes(base, base + ro_size, set_ro); + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base + layout->text_size, + (layout->ro_size - layout->text_size) >> PAGE_SHIFT); +} - /* - * Set NX permissions for module data: - * - Do not protect first partial page. - * - Always protect last page. - */ - if (total_size > text_size) { - begin_pfn = PFN_UP((unsigned long)base + text_size); - end_pfn = PFN_UP((unsigned long)base + total_size); - if (end_pfn > begin_pfn) - set_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); - } +static void frob_writable_data(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) +{ + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base + layout->ro_size, + (layout->size - layout->ro_size) >> PAGE_SHIFT); } -static void set_module_core_ro_nx(struct module *mod) +/* livepatching wants to disable read-only so it can frob module. */ +void module_disable_ro(const struct module *mod) { - set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, - mod->core_layout.ro_size, mod->core_layout.size, - set_memory_ro, set_memory_nx); + frob_text(&mod->core_layout, set_memory_rw); + frob_rodata(&mod->core_layout, set_memory_rw); + frob_text(&mod->init_layout, set_memory_rw); + frob_rodata(&mod->init_layout, set_memory_rw); } -static void unset_module_core_ro_nx(struct module *mod) +void module_enable_ro(const struct module *mod) { - set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, - mod->core_layout.ro_size, mod->core_layout.size, - set_memory_rw, set_memory_x); + frob_text(&mod->core_layout, set_memory_ro); + frob_rodata(&mod->core_layout, set_memory_ro); + frob_text(&mod->init_layout, set_memory_ro); + frob_rodata(&mod->init_layout, set_memory_ro); } -static void set_module_init_ro_nx(struct module *mod) +static void module_enable_nx(const struct module *mod) { - set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, - mod->init_layout.ro_size, mod->init_layout.size, - set_memory_ro, set_memory_nx); + frob_rodata(&mod->core_layout, set_memory_nx); + frob_writable_data(&mod->core_layout, set_memory_nx); + frob_rodata(&mod->init_layout, set_memory_nx); + frob_writable_data(&mod->init_layout, set_memory_nx); } -static void unset_module_init_ro_nx(struct module *mod) +static void module_disable_nx(const struct module *mod) { - set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, - mod->init_layout.ro_size, mod->init_layout.size, - set_memory_rw, set_memory_x); + frob_rodata(&mod->core_layout, set_memory_x); + frob_writable_data(&mod->core_layout, set_memory_x); + frob_rodata(&mod->init_layout, set_memory_x); + frob_writable_data(&mod->init_layout, set_memory_x); } /* Iterate through all modules and set each module's text as RW */ @@ -1937,16 +1929,9 @@ void set_all_modules_text_rw(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->core_layout.base) && (mod->core_layout.text_size)) { - set_page_attributes(mod->core_layout.base, - mod->core_layout.base + mod->core_layout.text_size, - set_memory_rw); - } - if ((mod->init_layout.base) && (mod->init_layout.text_size)) { - set_page_attributes(mod->init_layout.base, - mod->init_layout.base + mod->init_layout.text_size, - set_memory_rw); - } + + frob_text(&mod->core_layout, set_memory_rw); + frob_text(&mod->init_layout, set_memory_rw); } mutex_unlock(&module_mutex); } @@ -1960,24 +1945,25 @@ void set_all_modules_text_ro(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->core_layout.base) && (mod->core_layout.text_size)) { - set_page_attributes(mod->core_layout.base, - mod->core_layout.base + mod->core_layout.text_size, - set_memory_ro); - } - if ((mod->init_layout.base) && (mod->init_layout.text_size)) { - set_page_attributes(mod->init_layout.base, - mod->init_layout.base + mod->init_layout.text_size, - set_memory_ro); - } + + frob_text(&mod->core_layout, set_memory_ro); + frob_text(&mod->init_layout, set_memory_ro); } mutex_unlock(&module_mutex); } + +static void disable_ro_nx(const struct module_layout *layout) +{ + frob_text(layout, set_memory_rw); + frob_rodata(layout, set_memory_rw); + frob_rodata(layout, set_memory_x); + frob_writable_data(layout, set_memory_x); +} + #else -static void set_module_core_ro_nx(struct module *mod) { } -static void set_module_init_ro_nx(struct module *mod) { } -static void unset_module_core_ro_nx(struct module *mod) { } -static void unset_module_init_ro_nx(struct module *mod) { } +static void disable_ro_nx(const struct module_layout *layout) { } +static void module_enable_nx(const struct module *mod) { } +static void module_disable_nx(const struct module *mod) { } #endif void __weak module_memfree(void *module_region) @@ -2029,8 +2015,8 @@ static void free_module(struct module *mod) synchronize_sched(); mutex_unlock(&module_mutex); - /* This may be NULL, but that's OK */ - unset_module_init_ro_nx(mod); + /* This may be empty, but that's OK */ + disable_ro_nx(&mod->init_layout); module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); kfree(mod->args); @@ -2040,7 +2026,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); /* Finally, free the core (containing the module structure) */ - unset_module_core_ro_nx(mod); + disable_ro_nx(&mod->core_layout); module_memfree(mod->core_layout.base); #ifdef CONFIG_MPU @@ -3275,7 +3261,7 @@ static noinline int do_init_module(struct module *mod) mod->strtab = mod->core_strtab; #endif mod_tree_remove_init(mod); - unset_module_init_ro_nx(mod); + disable_ro_nx(&mod->init_layout); module_arch_freeing_init(mod); mod->init_layout.base = NULL; mod->init_layout.size = 0; @@ -3370,8 +3356,8 @@ static int complete_formation(struct module *mod, struct load_info *info) module_bug_finalize(info->hdr, info->sechdrs, mod); /* Set RO and NX regions */ - set_module_init_ro_nx(mod); - set_module_core_ro_nx(mod); + module_enable_ro(mod); + module_enable_nx(mod); /* Mark state as coming so strong_try_module_get() ignores us, * but kallsyms etc. can see us. */ @@ -3536,8 +3522,8 @@ static int load_module(struct load_info *info, const char __user *uargs, MODULE_STATE_GOING, mod); /* we can't deallocate the module until we clear memory protection */ - unset_module_init_ro_nx(mod); - unset_module_core_ro_nx(mod); + module_disable_ro(mod); + module_disable_nx(mod); ddebug_cleanup: dynamic_debug_remove(info->debug); -- cgit v1.2.3 From e0224418516b4d8a6c2160574bac18447c354ef0 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Thu, 26 Nov 2015 13:18:06 +1030 Subject: module: keep percpu symbols in module's symtab Currently, percpu symbols from .data..percpu ELF section of a module are not copied over and stored in final symtab array of struct module. Consequently such symbol cannot be returned via kallsyms API (for example kallsyms_lookup_name). This can be especially confusing when the percpu symbol is exported. Only its __ksymtab et al. are present in its symtab. The culprit is in layout_and_allocate() function where SHF_ALLOC flag is dropped for .data..percpu section. There is in fact no need to copy the section to final struct module, because kernel module loader allocates extra percpu section by itself. Unfortunately only symbols from SHF_ALLOC sections are copied due to a check in is_core_symbol(). The patch changes is_core_symbol() function to copy over also percpu symbols (their st_shndx points to .data..percpu ELF section). We do it only if CONFIG_KALLSYMS_ALL is set to be consistent with the rest of the function (ELF section is SHF_ALLOC but !SHF_EXECINSTR). Finally elf_type() returns type 'a' for a percpu symbol because its address is absolute. Signed-off-by: Miroslav Benes Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/module.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 77212128f34a..912e891e0e2f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2383,7 +2383,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } if (sym->st_shndx == SHN_UNDEF) return 'U'; - if (sym->st_shndx == SHN_ABS) + if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu) return 'a'; if (sym->st_shndx >= SHN_LORESERVE) return '?'; @@ -2412,7 +2412,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, - unsigned int shnum) + unsigned int shnum, unsigned int pcpundx) { const Elf_Shdr *sec; @@ -2421,6 +2421,11 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, || !src->st_name) return false; +#ifdef CONFIG_KALLSYMS_ALL + if (src->st_shndx == pcpundx) + return true; +#endif + sec = sechdrs + src->st_shndx; if (!(sec->sh_flags & SHF_ALLOC) #ifndef CONFIG_KALLSYMS_ALL @@ -2458,7 +2463,8 @@ static void layout_symtab(struct module *mod, struct load_info *info) /* Compute total space required for the core symbols' strtab. */ for (ndst = i = 0; i < nsrc; i++) { if (i == 0 || - is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, + info->index.pcpu)) { strtab_size += strlen(&info->strtab[src[i].st_name])+1; ndst++; } @@ -2500,7 +2506,8 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) src = mod->symtab; for (ndst = i = 0; i < mod->num_symtab; i++) { if (i == 0 || - is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, + info->index.pcpu)) { dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_strtab; s += strlcpy(s, &mod->strtab[src[i].st_name], -- cgit v1.2.3