aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c113
1 files changed, 106 insertions, 7 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 573fd78a7bca..f288477ee1c2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -113,7 +113,7 @@ static struct rcu_state rcu_state = {
static bool dump_tree;
module_param(dump_tree, bool, 0444);
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
-static bool use_softirq = 1;
+static bool use_softirq = true;
module_param(use_softirq, bool, 0444);
/* Control rcu_node-tree auto-balancing at boot time. */
static bool rcu_fanout_exact;
@@ -1275,7 +1275,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
goto unlock_out;
}
- trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
+ trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq"));
ret = true; /* Caller must wake GP kthread. */
unlock_out:
/* Push furthest requested GP to leaf node and rcu_data structure. */
@@ -1531,6 +1531,31 @@ static void rcu_gp_slow(int delay)
schedule_timeout_uninterruptible(delay);
}
+static unsigned long sleep_duration;
+
+/* Allow rcutorture to stall the grace-period kthread. */
+void rcu_gp_set_torture_wait(int duration)
+{
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)
+ WRITE_ONCE(sleep_duration, duration);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);
+
+/* Actually implement the aforementioned wait. */
+static void rcu_gp_torture_wait(void)
+{
+ unsigned long duration;
+
+ if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))
+ return;
+ duration = xchg(&sleep_duration, 0UL);
+ if (duration > 0) {
+ pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
+ schedule_timeout_uninterruptible(duration);
+ pr_alert("%s: Wait complete\n", __func__);
+ }
+}
+
/*
* Initialize a new grace period. Return false if no grace period required.
*/
@@ -1564,6 +1589,7 @@ static bool rcu_gp_init(void)
record_gp_stall_check_time();
/* Record GP times before starting GP, hence rcu_seq_start(). */
rcu_seq_start(&rcu_state.gp_seq);
+ ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
raw_spin_unlock_irq_rcu_node(rnp);
@@ -1669,12 +1695,16 @@ static bool rcu_gp_fqs_check_wake(int *gfp)
{
struct rcu_node *rnp = rcu_get_root();
- /* Someone like call_rcu() requested a force-quiescent-state scan. */
+ // If under overload conditions, force an immediate FQS scan.
+ if (*gfp & RCU_GP_FLAG_OVLD)
+ return true;
+
+ // Someone like call_rcu() requested a force-quiescent-state scan.
*gfp = READ_ONCE(rcu_state.gp_flags);
if (*gfp & RCU_GP_FLAG_FQS)
return true;
- /* The current grace period has completed. */
+ // The current grace period has completed.
if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
return true;
@@ -1712,13 +1742,15 @@ static void rcu_gp_fqs(bool first_time)
static void rcu_gp_fqs_loop(void)
{
bool first_gp_fqs;
- int gf;
+ int gf = 0;
unsigned long j;
int ret;
struct rcu_node *rnp = rcu_get_root();
first_gp_fqs = true;
j = READ_ONCE(jiffies_till_first_fqs);
+ if (rcu_state.cbovld)
+ gf = RCU_GP_FLAG_OVLD;
ret = 0;
for (;;) {
if (!ret) {
@@ -1731,6 +1763,7 @@ static void rcu_gp_fqs_loop(void)
rcu_state.gp_state = RCU_GP_WAIT_FQS;
ret = swait_event_idle_timeout_exclusive(
rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
+ rcu_gp_torture_wait();
rcu_state.gp_state = RCU_GP_DOING_FQS;
/* Locking provides needed memory barriers. */
/* If grace period done, leave loop. */
@@ -1738,12 +1771,16 @@ static void rcu_gp_fqs_loop(void)
!rcu_preempt_blocked_readers_cgp(rnp))
break;
/* If time for quiescent-state forcing, do it. */
- if (ULONG_CMP_GE(jiffies, rcu_state.jiffies_force_qs) ||
+ if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||
(gf & RCU_GP_FLAG_FQS)) {
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
TPS("fqsstart"));
rcu_gp_fqs(first_gp_fqs);
- first_gp_fqs = false;
+ gf = 0;
+ if (first_gp_fqs) {
+ first_gp_fqs = false;
+ gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0;
+ }
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
TPS("fqsend"));
cond_resched_tasks_rcu_qs();
@@ -1763,6 +1800,7 @@ static void rcu_gp_fqs_loop(void)
j = 1;
else
j = rcu_state.jiffies_force_qs - j;
+ gf = 0;
}
}
}
@@ -1839,6 +1877,7 @@ static void rcu_gp_cleanup(void)
/* Declare grace period done, trace first to use old GP number. */
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
rcu_seq_end(&rcu_state.gp_seq);
+ ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
rcu_state.gp_state = RCU_GP_IDLE;
/* Check for GP requests since above loop. */
rdp = this_cpu_ptr(&rcu_data);
@@ -1879,6 +1918,7 @@ static int __noreturn rcu_gp_kthread(void *unused)
swait_event_idle_exclusive(rcu_state.gp_wq,
READ_ONCE(rcu_state.gp_flags) &
RCU_GP_FLAG_INIT);
+ rcu_gp_torture_wait();
rcu_state.gp_state = RCU_GP_DONE_GPS;
/* Locking provides needed memory barrier. */
if (rcu_gp_init())
@@ -2869,6 +2909,8 @@ struct kfree_rcu_cpu {
struct delayed_work monitor_work;
bool monitor_todo;
bool initialized;
+ // Number of objects for which GP not started
+ int count;
};
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2982,6 +3024,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
krcp->head = NULL;
}
+ WRITE_ONCE(krcp->count, 0);
+
/*
* One work is per one batch, so there are two "free channels",
* "bhead_free" and "head_free" the batch can handle. It can be
@@ -3118,6 +3162,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
krcp->head = head;
}
+ WRITE_ONCE(krcp->count, krcp->count + 1);
+
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
!krcp->monitor_todo) {
@@ -3132,6 +3178,56 @@ unlock_return:
}
EXPORT_SYMBOL_GPL(kfree_call_rcu);
+static unsigned long
+kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu;
+ unsigned long count = 0;
+
+ /* Snapshot count of all CPUs */
+ for_each_online_cpu(cpu) {
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ count += READ_ONCE(krcp->count);
+ }
+
+ return count;
+}
+
+static unsigned long
+kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu, freed = 0;
+ unsigned long flags;
+
+ for_each_online_cpu(cpu) {
+ int count;
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ count = krcp->count;
+ spin_lock_irqsave(&krcp->lock, flags);
+ if (krcp->monitor_todo)
+ kfree_rcu_drain_unlock(krcp, flags);
+ else
+ spin_unlock_irqrestore(&krcp->lock, flags);
+
+ sc->nr_to_scan -= count;
+ freed += count;
+
+ if (sc->nr_to_scan <= 0)
+ break;
+ }
+
+ return freed;
+}
+
+static struct shrinker kfree_rcu_shrinker = {
+ .count_objects = kfree_rcu_shrink_count,
+ .scan_objects = kfree_rcu_shrink_scan,
+ .batch = 0,
+ .seeks = DEFAULT_SEEKS,
+};
+
void __init kfree_rcu_scheduler_running(void)
{
int cpu;
@@ -3657,6 +3753,7 @@ void rcu_cpu_starting(unsigned int cpu)
nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
/* Allow lockless access for expedited grace periods. */
smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + nbits); /* ^^^ */
+ ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);
rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
@@ -4052,6 +4149,8 @@ static void __init kfree_rcu_batch_init(void)
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
krcp->initialized = true;
}
+ if (register_shrinker(&kfree_rcu_shrinker))
+ pr_err("Failed to register kfree_rcu() shrinker!\n");
}
void __init rcu_init(void)