aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c167
1 files changed, 146 insertions, 21 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index ce2ff4e2993..efb62f04e61 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
#include <linux/ctype.h>
#include <linux/ftrace.h>
#include <linux/slab.h>
+#include <linux/cpuacct.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -2370,29 +2371,59 @@ EXPORT_SYMBOL_GPL(kick_process);
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
{
- int dest_cpu;
const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+ enum { cpuset, possible, fail } state = cpuset;
+ int dest_cpu;
/* Look for allowed, online CPU in same node. */
- for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ for_each_cpu_mask(dest_cpu, *nodemask) {
+ if (!cpu_online(dest_cpu))
+ continue;
+ if (!cpu_active(dest_cpu))
+ continue;
+ if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
return dest_cpu;
+ }
+
+ for (;;) {
+ /* Any allowed, online CPU? */
+ for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) {
+ if (!cpu_online(dest_cpu))
+ continue;
+ if (!cpu_active(dest_cpu))
+ continue;
+ goto out;
+ }
- /* Any allowed, online CPU? */
- dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
- if (dest_cpu < nr_cpu_ids)
- return dest_cpu;
+ switch (state) {
+ case cpuset:
+ /* No more Mr. Nice Guy. */
+ cpuset_cpus_allowed_fallback(p);
+ state = possible;
+ break;
- /* No more Mr. Nice Guy. */
- dest_cpu = cpuset_cpus_allowed_fallback(p);
- /*
- * Don't tell them about moving exiting tasks or
- * kernel threads (both mm NULL), since they never
- * leave kernel.
- */
- if (p->mm && printk_ratelimit()) {
- printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
- task_pid_nr(p), p->comm, cpu);
+ case possible:
+ do_set_cpus_allowed(p, cpu_possible_mask);
+ state = fail;
+ break;
+
+ case fail:
+ BUG();
+ break;
+ }
+ }
+
+out:
+ if (state != cpuset) {
+ /*
+ * Don't tell them about moving exiting tasks or
+ * kernel threads (both mm NULL), since they never
+ * leave kernel.
+ */
+ if (p->mm && printk_ratelimit()) {
+ printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
+ task_pid_nr(p), p->comm, cpu);
+ }
}
return dest_cpu;
@@ -6479,7 +6510,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
+ case CPU_STARTING:
case CPU_DOWN_FAILED:
set_cpu_active((long)hcpu, true);
return NOTIFY_OK;
@@ -7952,7 +7983,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
- plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock);
+ plist_head_init(&rt_rq->pushable_tasks);
#endif
rt_rq->rt_time = 0;
@@ -8157,7 +8188,7 @@ void __init sched_init(void)
#endif
#ifdef CONFIG_RT_MUTEXES
- plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock);
+ plist_head_init(&init_task.pi_waiters);
#endif
/*
@@ -8208,13 +8239,24 @@ static inline int preempt_count_equals(int preempt_offset)
return (nested == preempt_offset);
}
+static int __might_sleep_init_called;
+int __init __might_sleep_init(void)
+{
+ __might_sleep_init_called = 1;
+ return 0;
+}
+early_initcall(__might_sleep_init);
+
void __might_sleep(const char *file, int line, int preempt_offset)
{
#ifdef in_atomic
static unsigned long prev_jiffy; /* ratelimiting */
if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
- system_state != SYSTEM_RUNNING || oops_in_progress)
+ oops_in_progress)
+ return;
+ if (system_state != SYSTEM_RUNNING &&
+ (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
return;
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
return;
@@ -8965,6 +9007,20 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
}
static int
+cpu_cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk)
+{
+ const struct cred *cred = current_cred(), *tcred;
+
+ tcred = __task_cred(tsk);
+
+ if ((current != tsk) && !capable(CAP_SYS_NICE) &&
+ cred->euid != tcred->uid && cred->euid != tcred->suid)
+ return -EACCES;
+
+ return 0;
+}
+
+static int
cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
{
#ifdef CONFIG_RT_GROUP_SCHED
@@ -9069,6 +9125,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
.name = "cpu",
.create = cpu_cgroup_create,
.destroy = cpu_cgroup_destroy,
+ .allow_attach = cpu_cgroup_allow_attach,
.can_attach_task = cpu_cgroup_can_attach_task,
.attach_task = cpu_cgroup_attach_task,
.exit = cpu_cgroup_exit,
@@ -9095,8 +9152,30 @@ struct cpuacct {
u64 __percpu *cpuusage;
struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
struct cpuacct *parent;
+ struct cpuacct_charge_calls *cpufreq_fn;
+ void *cpuacct_data;
};
+static struct cpuacct *cpuacct_root;
+
+/* Default calls for cpufreq accounting */
+static struct cpuacct_charge_calls *cpuacct_cpufreq;
+int cpuacct_register_cpufreq(struct cpuacct_charge_calls *fn)
+{
+ cpuacct_cpufreq = fn;
+
+ /*
+ * Root node is created before platform can register callbacks,
+ * initalize here.
+ */
+ if (cpuacct_root && fn) {
+ cpuacct_root->cpufreq_fn = fn;
+ if (fn->init)
+ fn->init(&cpuacct_root->cpuacct_data);
+ }
+ return 0;
+}
+
struct cgroup_subsys cpuacct_subsys;
/* return cpu accounting group corresponding to this container */
@@ -9131,8 +9210,16 @@ static struct cgroup_subsys_state *cpuacct_create(
if (percpu_counter_init(&ca->cpustat[i], 0))
goto out_free_counters;
+ ca->cpufreq_fn = cpuacct_cpufreq;
+
+ /* If available, have platform code initalize cpu frequency table */
+ if (ca->cpufreq_fn && ca->cpufreq_fn->init)
+ ca->cpufreq_fn->init(&ca->cpuacct_data);
+
if (cgrp->parent)
ca->parent = cgroup_ca(cgrp->parent);
+ else
+ cpuacct_root = ca;
return &ca->css;
@@ -9260,6 +9347,32 @@ static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
return 0;
}
+static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ struct cpuacct *ca = cgroup_ca(cgrp);
+ if (ca->cpufreq_fn && ca->cpufreq_fn->cpufreq_show)
+ ca->cpufreq_fn->cpufreq_show(ca->cpuacct_data, cb);
+
+ return 0;
+}
+
+/* return total cpu power usage (milliWatt second) of a group */
+static u64 cpuacct_powerusage_read(struct cgroup *cgrp, struct cftype *cft)
+{
+ int i;
+ struct cpuacct *ca = cgroup_ca(cgrp);
+ u64 totalpower = 0;
+
+ if (ca->cpufreq_fn && ca->cpufreq_fn->power_usage)
+ for_each_present_cpu(i) {
+ totalpower += ca->cpufreq_fn->power_usage(
+ ca->cpuacct_data);
+ }
+
+ return totalpower;
+}
+
static struct cftype files[] = {
{
.name = "usage",
@@ -9274,6 +9387,14 @@ static struct cftype files[] = {
.name = "stat",
.read_map = cpuacct_stats_show,
},
+ {
+ .name = "cpufreq",
+ .read_map = cpuacct_cpufreq_show,
+ },
+ {
+ .name = "power",
+ .read_u64 = cpuacct_powerusage_read
+ },
};
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -9303,6 +9424,10 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
for (; ca; ca = ca->parent) {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
+
+ /* Call back into platform code to account for CPU speeds */
+ if (ca->cpufreq_fn && ca->cpufreq_fn->charge)
+ ca->cpufreq_fn->charge(ca->cpuacct_data, cputime, cpu);
}
rcu_read_unlock();