diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 2 | ||||
-rw-r--r-- | kernel/kprobes.c | 10 | ||||
-rw-r--r-- | kernel/livepatch/patch.c | 4 | ||||
-rw-r--r-- | kernel/livepatch/transition.c | 4 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.c | 4 | ||||
-rw-r--r-- | kernel/module.c | 14 | ||||
-rw-r--r-- | kernel/rcu/sync.c | 25 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 50 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 15 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 10 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 46 | ||||
-rw-r--r-- | kernel/rcu/update.c | 9 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/sched/membarrier.c | 6 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 24 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 2 | ||||
-rw-r--r-- | kernel/tracepoint.c | 4 | ||||
-rw-r--r-- | kernel/workqueue.c | 8 |
23 files changed, 140 insertions, 129 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6aaf5dd5383b..7a8429f8e280 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5343,7 +5343,7 @@ int __init cgroup_init(void) cgroup_rstat_boot(); /* - * The latency of the synchronize_sched() is too high for cgroups, + * The latency of the synchronize_rcu() is too high for cgroups, * avoid it at the cost of forcing all readers into the slow path. */ rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss); diff --git a/kernel/events/core.c b/kernel/events/core.c index 84530ab358c3..c4b90cf7734a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9918,7 +9918,7 @@ static void account_event(struct perf_event *event) * call the perf scheduling hooks before proceeding to * install events that need them. */ - synchronize_sched(); + synchronize_rcu(); } /* * Now that we have waited for the sync_sched(), allow further diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 90e98e233647..08e31d863191 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -229,7 +229,7 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c) struct kprobe_insn_page *kip, *next; /* Ensure no-one is interrupted on the garbages */ - synchronize_sched(); + synchronize_rcu(); list_for_each_entry_safe(kip, next, &c->pages, list) { int i; @@ -1382,7 +1382,7 @@ out: if (ret) { ap->flags |= KPROBE_FLAG_DISABLED; list_del_rcu(&p->list); - synchronize_sched(); + synchronize_rcu(); } } } @@ -1597,7 +1597,7 @@ int register_kprobe(struct kprobe *p) ret = arm_kprobe(p); if (ret) { hlist_del_rcu(&p->hlist); - synchronize_sched(); + synchronize_rcu(); goto out; } } @@ -1776,7 +1776,7 @@ void unregister_kprobes(struct kprobe **kps, int num) kps[i]->addr = NULL; mutex_unlock(&kprobe_mutex); - synchronize_sched(); + synchronize_rcu(); for (i = 0; i < num; i++) if (kps[i]->addr) __unregister_kprobe_bottom(kps[i]); @@ -1966,7 +1966,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num) rps[i]->kp.addr = NULL; mutex_unlock(&kprobe_mutex); - synchronize_sched(); + synchronize_rcu(); for (i = 0; i < num; i++) { if (rps[i]->kp.addr) { __unregister_kprobe_bottom(&rps[i]->kp); diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 82d584225dc6..7702cb4064fc 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -61,7 +61,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); /* - * A variant of synchronize_sched() is used to allow patching functions + * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). */ preempt_disable_notrace(); @@ -72,7 +72,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, /* * func should never be NULL because preemption should be disabled here * and unregister_ftrace_function() does the equivalent of a - * synchronize_sched() before the func_stack removal. + * synchronize_rcu() before the func_stack removal. */ if (WARN_ON_ONCE(!func)) goto unlock; diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 5bc349805e03..304d5eb8a98c 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -52,7 +52,7 @@ static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn); /* * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing + * of synchronize_rcu(). This requires synchronizing * tasks even in userspace and idle. */ static void klp_sync(struct work_struct *work) @@ -175,7 +175,7 @@ void klp_cancel_transition(void) void klp_update_patch_state(struct task_struct *task) { /* - * A variant of synchronize_sched() is used to allow patching functions + * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). */ preempt_disable_notrace(); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 1efada2dd9dd..ef27f98714c0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4195,7 +4195,7 @@ void lockdep_free_key_range(void *start, unsigned long size) * * sync_sched() is sufficient because the read-side is IRQ disable. */ - synchronize_sched(); + synchronize_rcu(); /* * XXX at this point we could return the resources to the pool; diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 9aa713629387..771d4ca96dda 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -36,7 +36,7 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) { - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + lockdep_assert_held(&lock->wait_lock); DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); @@ -51,7 +51,7 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + lockdep_assert_held(&lock->wait_lock); /* Mark the current thread as blocked on the lock: */ task->blocked_on = waiter; diff --git a/kernel/module.c b/kernel/module.c index 49a405891587..99b46c32d579 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2159,7 +2159,7 @@ static void free_module(struct module *mod) /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ - synchronize_sched(); + synchronize_rcu(); mutex_unlock(&module_mutex); /* This may be empty, but that's OK */ @@ -3507,15 +3507,15 @@ static noinline int do_init_module(struct module *mod) /* * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we - * call synchronize_sched(), but we don't want to slow down the success + * call synchronize_rcu(), but we don't want to slow down the success * path, so use actual RCU here. * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to * be cleaned up needs to sync with the queued work - ie - * rcu_barrier_sched() + * rcu_barrier() */ - call_rcu_sched(&freeinit->rcu, do_free_init); + call_rcu(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); wake_up_all(&module_wq); @@ -3526,7 +3526,7 @@ fail_free_freeinit: fail: /* Try to protect us from buggy refcounters. */ mod->state = MODULE_STATE_GOING; - synchronize_sched(); + synchronize_rcu(); module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); @@ -3819,7 +3819,7 @@ static int load_module(struct load_info *info, const char __user *uargs, ddebug_cleanup: ftrace_release_mod(mod); dynamic_debug_remove(mod, info->debug); - synchronize_sched(); + synchronize_rcu(); kfree(mod->args); free_arch_cleanup: module_arch_cleanup(mod); @@ -3834,7 +3834,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ - synchronize_sched(); + synchronize_rcu(); mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 3f943efcf61c..be10036fa621 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -44,15 +44,15 @@ static const struct { __INIT_HELD(rcu_read_lock_held) }, [RCU_SCHED_SYNC] = { - .sync = synchronize_sched, - .call = call_rcu_sched, - .wait = rcu_barrier_sched, + .sync = synchronize_rcu, + .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_sched_held) }, [RCU_BH_SYNC] = { - .sync = synchronize_rcu_bh, - .call = call_rcu_bh, - .wait = rcu_barrier_bh, + .sync = synchronize_rcu, + .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_bh_held) }, }; @@ -125,8 +125,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) rsp->gp_state = GP_PENDING; spin_unlock_irq(&rsp->rss_lock); - BUG_ON(need_wait && need_sync); - + WARN_ON_ONCE(need_wait && need_sync); if (need_sync) { gp_ops[rsp->gp_type].sync(); rsp->gp_state = GP_PASSED; @@ -139,7 +138,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) * Nobody has yet been allowed the 'fast' path and thus we can * avoid doing any sync(). The callback will get 'dropped'. */ - BUG_ON(rsp->gp_state != GP_PASSED); + WARN_ON_ONCE(rsp->gp_state != GP_PASSED); } } @@ -166,8 +165,8 @@ static void rcu_sync_func(struct rcu_head *rhp) struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); unsigned long flags; - BUG_ON(rsp->gp_state != GP_PASSED); - BUG_ON(rsp->cb_state == CB_IDLE); + WARN_ON_ONCE(rsp->gp_state != GP_PASSED); + WARN_ON_ONCE(rsp->cb_state == CB_IDLE); spin_lock_irqsave(&rsp->rss_lock, flags); if (rsp->gp_count) { @@ -225,7 +224,7 @@ void rcu_sync_dtor(struct rcu_sync *rsp) { int cb_state; - BUG_ON(rsp->gp_count); + WARN_ON_ONCE(rsp->gp_count); spin_lock_irq(&rsp->rss_lock); if (rsp->cb_state == CB_REPLAY) @@ -235,6 +234,6 @@ void rcu_sync_dtor(struct rcu_sync *rsp) if (cb_state != CB_IDLE) { gp_ops[rsp->gp_type].wait(); - BUG_ON(rsp->cb_state != CB_IDLE); + WARN_ON_ONCE(rsp->cb_state != CB_IDLE); } } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 121f833acd04..6ec3abbe90e2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -500,16 +500,29 @@ void rcu_force_quiescent_state(void) EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); /* + * Convert a ->gp_state value to a character string. + */ +static const char *gp_state_getname(short gs) +{ + if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) + return "???"; + return gp_state_names[gs]; +} + +/* * Show the state of the grace-period kthreads. */ void show_rcu_gp_kthreads(void) { int cpu; + unsigned long j; struct rcu_data *rdp; struct rcu_node *rnp; - pr_info("%s: wait state: %d ->state: %#lx\n", rcu_state.name, - rcu_state.gp_state, rcu_state.gp_kthread->state); + j = jiffies - READ_ONCE(rcu_state.gp_activity); + pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n", + rcu_state.name, gp_state_getname(rcu_state.gp_state), + rcu_state.gp_state, rcu_state.gp_kthread->state, j); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) continue; @@ -891,12 +904,12 @@ void rcu_irq_enter_irqson(void) } /** - * rcu_is_watching - see if RCU thinks that the current CPU is idle + * rcu_is_watching - see if RCU thinks that the current CPU is not idle * * Return true if RCU is watching the running CPU, which means that this * CPU can safely enter RCU read-side critical sections. In other words, - * if the current CPU is in its idle loop and is neither in an interrupt - * or NMI handler, return true. + * if the current CPU is not in its idle loop or is in an interrupt or + * NMI handler, return true. */ bool notrace rcu_is_watching(void) { @@ -1143,16 +1156,6 @@ static void record_gp_stall_check_time(void) } /* - * Convert a ->gp_state value to a character string. - */ -static const char *gp_state_getname(short gs) -{ - if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) - return "???"; - return gp_state_names[gs]; -} - -/* * Complain about starvation of grace-period kthread. */ static void rcu_check_gp_kthread_starvation(void) @@ -2032,9 +2035,9 @@ static void rcu_gp_cleanup(void) rnp = rcu_get_root(); raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */ - /* Declare grace period done. */ - rcu_seq_end(&rcu_state.gp_seq); + /* Declare grace period done, trace first to use old GP number. */ trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end")); + rcu_seq_end(&rcu_state.gp_seq); rcu_state.gp_state = RCU_GP_IDLE; /* Check for GP requests since above loop. */ rdp = this_cpu_ptr(&rcu_data); @@ -2600,10 +2603,10 @@ static void force_quiescent_state(void) * This function checks for grace-period requests that fail to motivate * RCU to come out of its idle mode. */ -static void -rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp) +void +rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, + const unsigned long gpssdelay) { - const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ; unsigned long flags; unsigned long j; struct rcu_node *rnp_root = rcu_get_root(); @@ -2690,7 +2693,7 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused local_irq_restore(flags); } - rcu_check_gp_start_stall(rnp, rdp); + rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check()); /* If there are callbacks ready, invoke them. */ if (rcu_segcblist_ready_cbs(&rdp->cblist)) @@ -2826,7 +2829,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy) * Very early boot, before rcu_init(). Initialize if needed * and then drop through to queue the callback. */ - BUG_ON(cpu != -1); + WARN_ON_ONCE(cpu != -1); WARN_ON_ONCE(!rcu_is_watching()); if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); @@ -3485,7 +3488,8 @@ static int __init rcu_spawn_gp_kthread(void) rcu_scheduler_fully_active = 1; t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__)) + return 0; rnp = rcu_get_root(); raw_spin_lock_irqsave_rcu_node(rnp, flags); rcu_state.gp_kthread = t; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 703e19ff532d..c3e2807a834a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -57,7 +57,7 @@ struct rcu_node { /* some rcu_state fields as well as */ /* following. */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq. */ - unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */ + unsigned long gp_seq_needed; /* Track furthest future GP request. */ unsigned long completedqs; /* All QSes done for this node. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ @@ -163,7 +163,7 @@ union rcu_noqs { struct rcu_data { /* 1) quiescent-state and grace-period handling : */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ - unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */ + unsigned long gp_seq_needed; /* Track furthest future GP request. */ union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ @@ -398,17 +398,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name; #define RCU_NAME rcu_name #endif /* #else #ifdef CONFIG_TRACING */ -/* - * RCU implementation internal declarations: - */ -extern struct rcu_state rcu_sched_state; - -extern struct rcu_state rcu_bh_state; - -#ifdef CONFIG_PREEMPT_RCU -extern struct rcu_state rcu_preempt_state; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - int rcu_dynticks_snap(struct rcu_data *rdp); #ifdef CONFIG_RCU_BOOST diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8d18c1014e2b..928fe5893a57 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -450,10 +450,12 @@ static void sync_rcu_exp_select_cpus(smp_call_func_t func) } INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); preempt_disable(); - cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask); + cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1); /* If all offline, queue the work on an unbound CPU. */ - if (unlikely(cpu > rnp->grphi)) + if (unlikely(cpu > rnp->grphi - rnp->grplo)) cpu = WORK_CPU_UNBOUND; + else + cpu += rnp->grplo; queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); preempt_enable(); rnp->exp_need_flush = true; @@ -690,8 +692,10 @@ static void sync_rcu_exp_handler(void *unused) */ if (t->rcu_read_lock_nesting > 0) { raw_spin_lock_irqsave_rcu_node(rnp, flags); - if (rnp->expmask & rdp->grpmask) + if (rnp->expmask & rdp->grpmask) { rdp->deferred_qs = true; + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true); + } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 05915e536336..605ff3b06098 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -397,6 +397,11 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) return rnp->gp_tasks != NULL; } +/* Bias and limit values for ->rcu_read_lock_nesting. */ +#define RCU_NEST_BIAS INT_MAX +#define RCU_NEST_NMAX (-INT_MAX / 2) +#define RCU_NEST_PMAX (INT_MAX / 2) + /* * Preemptible RCU implementation for rcu_read_lock(). * Just increment ->rcu_read_lock_nesting, shared state will be updated @@ -405,6 +410,8 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) void __rcu_read_lock(void) { current->rcu_read_lock_nesting++; + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) + WARN_ON_ONCE(current->rcu_read_lock_nesting > RCU_NEST_PMAX); barrier(); /* critical section after entry code. */ } EXPORT_SYMBOL_GPL(__rcu_read_lock); @@ -424,20 +431,18 @@ void __rcu_read_unlock(void) --t->rcu_read_lock_nesting; } else { barrier(); /* critical section before exit code. */ - t->rcu_read_lock_nesting = INT_MIN; + t->rcu_read_lock_nesting = -RCU_NEST_BIAS; barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) rcu_read_unlock_special(t); barrier(); /* ->rcu_read_unlock_special load before assign */ t->rcu_read_lock_nesting = 0; } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = READ_ONCE(t->rcu_read_lock_nesting); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + int rrln = t->rcu_read_lock_nesting; - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); + WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX); } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); @@ -597,7 +602,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { - return (this_cpu_ptr(&rcu_data)->deferred_qs || + return (__this_cpu_read(rcu_data.deferred_qs) || READ_ONCE(t->rcu_read_unlock_special.s)) && t->rcu_read_lock_nesting <= 0; } @@ -617,11 +622,11 @@ static void rcu_preempt_deferred_qs(struct task_struct *t) if (!rcu_preempt_need_deferred_qs(t)) return; if (couldrecurse) - t->rcu_read_lock_nesting -= INT_MIN; + t->rcu_read_lock_nesting -= RCU_NEST_BIAS; local_irq_save(flags); rcu_preempt_deferred_qs_irqrestore(t, flags); if (couldrecurse) - t->rcu_read_lock_nesting += INT_MIN; + t->rcu_read_lock_nesting += RCU_NEST_BIAS; } /* @@ -642,13 +647,21 @@ static void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); irqs_were_disabled = irqs_disabled_flags(flags); - if ((preempt_bh_were_disabled || irqs_were_disabled) && - t->rcu_read_unlock_special.b.blocked) { + if (preempt_bh_were_disabled || irqs_were_disabled) { + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); /* Need to defer quiescent state until everything is enabled. */ - raise_softirq_irqoff(RCU_SOFTIRQ); + if (irqs_were_disabled) { + /* Enabling irqs does not reschedule, so... */ + raise_softirq_irqoff(RCU_SOFTIRQ); + } else { + /* Enabling BH or preempt does reschedule, so... */ + set_tsk_need_resched(current); + set_preempt_need_resched(); + } local_irq_restore(flags); return; } + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); rcu_preempt_deferred_qs_irqrestore(t, flags); } @@ -1464,7 +1477,8 @@ static void __init rcu_spawn_boost_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); + if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__)) + return; rcu_for_each_leaf_node(rnp) (void)rcu_spawn_one_boost_kthread(rnp); } @@ -2322,7 +2336,8 @@ static int rcu_nocb_kthread(void *arg) tail = rdp->nocb_follower_tail; rdp->nocb_follower_tail = &rdp->nocb_follower_head; raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); - BUG_ON(!list); + if (WARN_ON_ONCE(!list)) + continue; trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeNonEmpty")); /* Each pass through the following loop invokes a callback. */ @@ -2495,7 +2510,8 @@ static void rcu_spawn_one_nocb_kthread(int cpu) /* Spawn the kthread for this CPU. */ t = kthread_run(rcu_nocb_kthread, rdp_spawn, "rcuo%c/%d", rcu_state.abbr, cpu); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo kthread, OOM is now expected behavior\n", __func__)) + return; WRITE_ONCE(rdp_spawn->nocb_kthread, t); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f203b94f6b5b..1971869c4072 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -335,8 +335,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Initialize and register callbacks for each crcu_array element. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) { + (crcu_array[i] == call_rcu)) { might_sleep(); continue; } @@ -352,8 +351,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Wait for all callbacks to be invoked. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) + (crcu_array[i] == call_rcu)) continue; for (j = 0; j < i; j++) if (crcu_array[j] == crcu_array[i]) @@ -822,7 +820,8 @@ static int __init rcu_spawn_tasks_kthread(void) struct task_struct *t; t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__)) + return 0; smp_mb(); /* Ensure others see full kthread. */ WRITE_ONCE(rcu_tasks_kthread_ptr, t); return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f12225f26b70..ea12ebc57840 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5788,7 +5788,7 @@ int sched_cpu_deactivate(unsigned int cpu) * * Do sync before park smpboot threads to take care the rcu boost case. */ - synchronize_rcu_mult(call_rcu, call_rcu_sched); + synchronize_rcu(); if (!sched_smp_initialized) return 0; diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 76e0eaf4654e..3cd8a3a795d2 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -210,7 +210,7 @@ static int membarrier_register_global_expedited(void) * future scheduler executions will observe the new * thread flag state for this mm. */ - synchronize_sched(); + synchronize_rcu(); } atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, &mm->membarrier_state); @@ -246,7 +246,7 @@ static int membarrier_register_private_expedited(int flags) * Ensure all future scheduler executions will observe the * new thread flag state for this process. */ - synchronize_sched(); + synchronize_rcu(); } atomic_or(state, &mm->membarrier_state); @@ -298,7 +298,7 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) if (tick_nohz_full_enabled()) return -EINVAL; if (num_online_cpus() > 1) - synchronize_sched(); + synchronize_rcu(); return 0; case MEMBARRIER_CMD_GLOBAL_EXPEDITED: return membarrier_global_expedited(); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f536f601bd46..5b4f73e4fd56 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -173,7 +173,7 @@ static void ftrace_sync(struct work_struct *work) { /* * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing + * of synchronize_rcu(). This requires synchronizing * tasks even in userspace and idle. * * Yes, function tracing is rude. @@ -934,7 +934,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, ftrace_profile_enabled = 0; /* * unregister_ftrace_profiler calls stop_machine - * so this acts like an synchronize_sched. + * so this acts like an synchronize_rcu. */ unregister_ftrace_profiler(); } @@ -1086,7 +1086,7 @@ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) /* * Some of the ops may be dynamically allocated, - * they are freed after a synchronize_sched(). + * they are freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -1286,7 +1286,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) { if (!hash || hash == EMPTY_HASH) return; - call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); + call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } void ftrace_free_filter(struct ftrace_ops *ops) @@ -1501,7 +1501,7 @@ static bool hash_contains_ip(unsigned long ip, * the ip is not in the ops->notrace_hash. * * This needs to be called with preemption disabled as - * the hashes are freed with call_rcu_sched(). + * the hashes are freed with call_rcu(). */ static int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) @@ -4496,7 +4496,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, if (ftrace_enabled && !ftrace_hash_empty(hash)) ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); - synchronize_sched(); + synchronize_rcu(); hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); @@ -5314,7 +5314,7 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); /* Wait till all users are no longer using the old hash */ - synchronize_sched(); + synchronize_rcu(); free_ftrace_hash(old_hash); } @@ -5707,7 +5707,7 @@ void ftrace_release_mod(struct module *mod) list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { if (mod_map->mod == mod) { list_del_rcu(&mod_map->list); - call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); + call_rcu(&mod_map->rcu, ftrace_free_mod_map); break; } } @@ -5927,7 +5927,7 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, struct ftrace_mod_map *mod_map; const char *ret = NULL; - /* mod_map is freed via call_rcu_sched() */ + /* mod_map is freed via call_rcu() */ preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); @@ -6262,7 +6262,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, /* * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). + * they must be freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -6433,7 +6433,7 @@ static void clear_ftrace_pids(struct trace_array *tr) rcu_assign_pointer(tr->function_pids, NULL); /* Wait till all users are no longer using pid filtering */ - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(pid_list); } @@ -6580,7 +6580,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, rcu_assign_pointer(tr->function_pids, pid_list); if (filtered_pids) { - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(filtered_pids); } else if (pid_list) { /* Register a probe to set whether to ignore the tracing of a task */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 65bd4616220d..4f3247a53259 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1834,7 +1834,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, * There could have been a race between checking * record_disable and incrementing it. */ - synchronize_sched(); + synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; rb_check_pages(cpu_buffer); @@ -3151,7 +3151,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable(struct ring_buffer *buffer) { @@ -3253,7 +3253,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) { @@ -4191,7 +4191,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); void ring_buffer_read_prepare_sync(void) { - synchronize_sched(); + synchronize_rcu(); } EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); @@ -4363,7 +4363,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); @@ -4496,7 +4496,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, goto out; /* - * We can't do a synchronize_sched here because this + * We can't do a synchronize_rcu here because this * function can be called in atomic context. * Normally this will be called from the same CPU as cpu. * If not it's up to the caller to protect this. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff1c4b20cd0a..51612b4a603f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1681,7 +1681,7 @@ void tracing_reset(struct trace_buffer *buf, int cpu) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); ring_buffer_reset_cpu(buffer, cpu); ring_buffer_record_enable(buffer); @@ -1698,7 +1698,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); buf->time_start = buffer_ftrace_now(buf, buf->cpu); @@ -2250,7 +2250,7 @@ void trace_buffered_event_disable(void) preempt_enable(); /* Wait for all current users to finish */ - synchronize_sched(); + synchronize_rcu(); for_each_tracing_cpu(cpu) { free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); @@ -5398,7 +5398,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); - /* Current trace needs to be nop_trace before synchronize_sched */ + /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; #ifdef CONFIG_TRACER_MAX_TRACE @@ -5412,7 +5412,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) * The update_max_tr is called from interrupts disabled * so a synchronized_sched() is sufficient. */ - synchronize_sched(); + synchronize_rcu(); free_snapshot(tr); } #endif diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 84a65173b1e9..35f3aa55be85 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1614,7 +1614,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, /* * The calls can still be using the old filters. - * Do a synchronize_sched() and to ensure all calls are + * Do a synchronize_rcu() and to ensure all calls are * done with them before we free them. */ tracepoint_synchronize_unregister(); @@ -1845,7 +1845,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, if (filter) { /* * No event actually uses the system filter - * we can free it without synchronize_sched(). + * we can free it without synchronize_rcu(). */ __free_filter(system->filter); system->filter = filter; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fec67188c4d2..adc153ab51c0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -333,7 +333,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) * event_call related objects, which will be accessed in * the kprobe_trace_func/kretprobe_trace_func. */ - synchronize_sched(); + synchronize_rcu(); kfree(link); /* Ignored if link == NULL */ } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index a3be42304485..46f2ab1e08a9 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -92,7 +92,7 @@ static __init int release_early_probes(void) while (early_probes) { tmp = early_probes; early_probes = tmp->next; - call_rcu_sched(tmp, rcu_free_old_probes); + call_rcu(tmp, rcu_free_old_probes); } return 0; @@ -123,7 +123,7 @@ static inline void release_probes(struct tracepoint_func *old) * cover both cases. So let us chain the SRCU and sched RCU * callbacks to wait for both grace periods. */ - call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes); + call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0280deac392e..392be4b252f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3396,7 +3396,7 @@ static void put_unbound_pool(struct worker_pool *pool) del_timer_sync(&pool->mayday_timer); /* sched-RCU protected to allow dereferences from get_work_pool() */ - call_rcu_sched(&pool->rcu, rcu_free_pool); + call_rcu(&pool->rcu, rcu_free_pool); } /** @@ -3503,14 +3503,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); - call_rcu_sched(&pwq->rcu, rcu_free_pwq); + call_rcu(&pwq->rcu, rcu_free_pwq); /* * If we're the last pwq going away, @wq is already dead and no one * is gonna access it anymore. Schedule RCU free. */ if (is_last) - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } /** @@ -4195,7 +4195,7 @@ void destroy_workqueue(struct workqueue_struct *wq) * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. */ - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } else { /* * We're the sole accessor of @wq at this point. Directly |