aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c138
1 files changed, 134 insertions, 4 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e5903669e336..f288477ee1c2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -238,7 +238,9 @@ void rcu_softirq_qs(void)
/*
* Record entry into an extended quiescent state. This is only to be
- * called when not already in an extended quiescent state.
+ * called when not already in an extended quiescent state, that is,
+ * RCU is watching prior to the call to this function and is no longer
+ * watching upon return.
*/
static void rcu_dynticks_eqs_enter(void)
{
@@ -250,8 +252,9 @@ static void rcu_dynticks_eqs_enter(void)
* critical sections, and we also must force ordering with the
* next idle sojourn.
*/
+ rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
- /* Better be in an extended quiescent state! */
+ // RCU is no longer watching. Better be in extended quiescent state!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
(seq & RCU_DYNTICK_CTRL_CTR));
/* Better not have special action (TLB flush) pending! */
@@ -261,7 +264,8 @@ static void rcu_dynticks_eqs_enter(void)
/*
* Record exit from an extended quiescent state. This is only to be
- * called from an extended quiescent state.
+ * called from an extended quiescent state, that is, RCU is not watching
+ * prior to the call to this function and is watching upon return.
*/
static void rcu_dynticks_eqs_exit(void)
{
@@ -274,6 +278,8 @@ static void rcu_dynticks_eqs_exit(void)
* critical section.
*/
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ // RCU is now watching. Better not be in an extended quiescent state!
+ rcu_dynticks_task_trace_exit(); // After ->dynticks update!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
!(seq & RCU_DYNTICK_CTRL_CTR));
if (seq & RCU_DYNTICK_CTRL_MASK) {
@@ -346,6 +352,28 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
}
/*
+ * Return true if the referenced integer is zero while the specified
+ * CPU remains within a single extended quiescent state.
+ */
+bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ int snap;
+
+ // If not quiescent, force back to earlier extended quiescent state.
+ snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK |
+ RCU_DYNTICK_CTRL_CTR);
+
+ smp_rmb(); // Order ->dynticks and *vp reads.
+ if (READ_ONCE(*vp))
+ return false; // Non-zero, so report failure;
+ smp_rmb(); // Order *vp read and ->dynticks re-read.
+
+ // If still in the same extended quiescent state, we are good!
+ return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK);
+}
+
+/*
* Set the special (bottom) bit of the specified CPU so that it
* will take special action (such as flushing its TLB) on the
* next exit from an extended quiescent state. Returns true if
@@ -584,6 +612,7 @@ static void rcu_eqs_enter(bool user)
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
rdp->dynticks_nesting == 0);
if (rdp->dynticks_nesting != 1) {
+ // RCU will still be watching, so just do accounting and leave.
rdp->dynticks_nesting--;
return;
}
@@ -596,7 +625,9 @@ static void rcu_eqs_enter(bool user)
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
+ // RCU is watching here ...
rcu_dynticks_eqs_enter();
+ // ... but is no longer watching here.
rcu_dynticks_task_enter();
}
@@ -676,7 +707,9 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
if (irq)
rcu_prepare_for_idle();
+ // RCU is watching here ...
rcu_dynticks_eqs_enter();
+ // ... but is no longer watching here.
if (irq)
rcu_dynticks_task_enter();
@@ -751,11 +784,14 @@ static void rcu_eqs_exit(bool user)
oldval = rdp->dynticks_nesting;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
if (oldval) {
+ // RCU was already watching, so just do accounting and leave.
rdp->dynticks_nesting++;
return;
}
rcu_dynticks_task_exit();
+ // RCU is not watching here ...
rcu_dynticks_eqs_exit();
+ // ... but is watching here.
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
@@ -832,7 +868,9 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
if (irq)
rcu_dynticks_task_exit();
+ // RCU is not watching here ...
rcu_dynticks_eqs_exit();
+ // ... but is watching here.
if (irq)
rcu_cleanup_after_idle();
@@ -842,9 +880,16 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
READ_ONCE(rdp->rcu_urgent_qs) &&
!READ_ONCE(rdp->rcu_forced_tick)) {
+ // We get here only if we had already exited the extended
+ // quiescent state and this was an interrupt (not an NMI).
+ // Therefore, (1) RCU is already watching and (2) The fact
+ // that we are in an interrupt handler and that the rcu_node
+ // lock is an irq-disabled lock prevents self-deadlock.
+ // So we can safely recheck under the lock.
raw_spin_lock_rcu_node(rdp->mynode);
- // Recheck under lock.
if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+ // A nohz_full CPU is in the kernel and RCU
+ // needs a quiescent state. Turn on the tick!
WRITE_ONCE(rdp->rcu_forced_tick, true);
tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
}
@@ -1486,6 +1531,31 @@ static void rcu_gp_slow(int delay)
schedule_timeout_uninterruptible(delay);
}
+static unsigned long sleep_duration;
+
+/* Allow rcutorture to stall the grace-period kthread. */
+void rcu_gp_set_torture_wait(int duration)
+{
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)
+ WRITE_ONCE(sleep_duration, duration);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);
+
+/* Actually implement the aforementioned wait. */
+static void rcu_gp_torture_wait(void)
+{
+ unsigned long duration;
+
+ if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))
+ return;
+ duration = xchg(&sleep_duration, 0UL);
+ if (duration > 0) {
+ pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
+ schedule_timeout_uninterruptible(duration);
+ pr_alert("%s: Wait complete\n", __func__);
+ }
+}
+
/*
* Initialize a new grace period. Return false if no grace period required.
*/
@@ -1693,6 +1763,7 @@ static void rcu_gp_fqs_loop(void)
rcu_state.gp_state = RCU_GP_WAIT_FQS;
ret = swait_event_idle_timeout_exclusive(
rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
+ rcu_gp_torture_wait();
rcu_state.gp_state = RCU_GP_DOING_FQS;
/* Locking provides needed memory barriers. */
/* If grace period done, leave loop. */
@@ -1847,6 +1918,7 @@ static int __noreturn rcu_gp_kthread(void *unused)
swait_event_idle_exclusive(rcu_state.gp_wq,
READ_ONCE(rcu_state.gp_flags) &
RCU_GP_FLAG_INIT);
+ rcu_gp_torture_wait();
rcu_state.gp_state = RCU_GP_DONE_GPS;
/* Locking provides needed memory barrier. */
if (rcu_gp_init())
@@ -2837,6 +2909,8 @@ struct kfree_rcu_cpu {
struct delayed_work monitor_work;
bool monitor_todo;
bool initialized;
+ // Number of objects for which GP not started
+ int count;
};
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2950,6 +3024,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
krcp->head = NULL;
}
+ WRITE_ONCE(krcp->count, 0);
+
/*
* One work is per one batch, so there are two "free channels",
* "bhead_free" and "head_free" the batch can handle. It can be
@@ -3086,6 +3162,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
krcp->head = head;
}
+ WRITE_ONCE(krcp->count, krcp->count + 1);
+
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
!krcp->monitor_todo) {
@@ -3100,6 +3178,56 @@ unlock_return:
}
EXPORT_SYMBOL_GPL(kfree_call_rcu);
+static unsigned long
+kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu;
+ unsigned long count = 0;
+
+ /* Snapshot count of all CPUs */
+ for_each_online_cpu(cpu) {
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ count += READ_ONCE(krcp->count);
+ }
+
+ return count;
+}
+
+static unsigned long
+kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu, freed = 0;
+ unsigned long flags;
+
+ for_each_online_cpu(cpu) {
+ int count;
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ count = krcp->count;
+ spin_lock_irqsave(&krcp->lock, flags);
+ if (krcp->monitor_todo)
+ kfree_rcu_drain_unlock(krcp, flags);
+ else
+ spin_unlock_irqrestore(&krcp->lock, flags);
+
+ sc->nr_to_scan -= count;
+ freed += count;
+
+ if (sc->nr_to_scan <= 0)
+ break;
+ }
+
+ return freed;
+}
+
+static struct shrinker kfree_rcu_shrinker = {
+ .count_objects = kfree_rcu_shrink_count,
+ .scan_objects = kfree_rcu_shrink_scan,
+ .batch = 0,
+ .seeks = DEFAULT_SEEKS,
+};
+
void __init kfree_rcu_scheduler_running(void)
{
int cpu;
@@ -4021,6 +4149,8 @@ static void __init kfree_rcu_batch_init(void)
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
krcp->initialized = true;
}
+ if (register_shrinker(&kfree_rcu_shrinker))
+ pr_err("Failed to register kfree_rcu() shrinker!\n");
}
void __init rcu_init(void)