aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2005-06-25 14:57:25 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-25 16:24:43 -0700
commit245af2c7870bd5940f7bfad19a0a03b32751fbc5 (patch)
tree7c54e2b290a6b1a9fd15fa99f194c7ed5e9f0a11 /kernel
parent41c7ce9ad9a859871dffbe7dbc8b1f9571724e3c (diff)
downloadkernel_samsung_smdk4412-245af2c7870bd5940f7bfad19a0a03b32751fbc5.tar.gz
kernel_samsung_smdk4412-245af2c7870bd5940f7bfad19a0a03b32751fbc5.tar.bz2
kernel_samsung_smdk4412-245af2c7870bd5940f7bfad19a0a03b32751fbc5.zip
[PATCH] sched: remove degenerate domains
Remove degenerate scheduler domains during the sched-domain init. For example on x86_64, we always have NUMA configured in. On Intel EM64T systems, top most sched domain will be of NUMA and with only one sched_group in it. With fork/exec balances(recent Nick's fixes in -mm tree), we always endup taking wrong decisions because of this topmost domain (as it contains only one group and find_idlest_group always returns NULL). We will endup loading HT package completely first, letting active load balance kickin and correct it. In general, this patch also makes sense with out recent Nick's fixes in -mm. From: Nick Piggin <nickpiggin@yahoo.com.au> Modified to account for more than just sched_groups when scanning for degenerate domains by Nick Piggin. And allow a runqueue's sd to go NULL rather than keep a single degenerate domain around (this happens when you run with maxcpus=1). Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c64
1 files changed, 64 insertions, 0 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 77c07c2928b..e75b301b534 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4712,6 +4712,57 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
#define sched_domain_debug(sd, cpu) {}
#endif
+static int __devinit sd_degenerate(struct sched_domain *sd)
+{
+ if (cpus_weight(sd->span) == 1)
+ return 1;
+
+ /* Following flags need at least 2 groups */
+ if (sd->flags & (SD_LOAD_BALANCE |
+ SD_BALANCE_NEWIDLE |
+ SD_BALANCE_FORK |
+ SD_BALANCE_EXEC)) {
+ if (sd->groups != sd->groups->next)
+ return 0;
+ }
+
+ /* Following flags don't use groups */
+ if (sd->flags & (SD_WAKE_IDLE |
+ SD_WAKE_AFFINE |
+ SD_WAKE_BALANCE))
+ return 0;
+
+ return 1;
+}
+
+static int __devinit sd_parent_degenerate(struct sched_domain *sd,
+ struct sched_domain *parent)
+{
+ unsigned long cflags = sd->flags, pflags = parent->flags;
+
+ if (sd_degenerate(parent))
+ return 1;
+
+ if (!cpus_equal(sd->span, parent->span))
+ return 0;
+
+ /* Does parent contain flags not in child? */
+ /* WAKE_BALANCE is a subset of WAKE_AFFINE */
+ if (cflags & SD_WAKE_AFFINE)
+ pflags &= ~SD_WAKE_BALANCE;
+ /* Flags needing groups don't count if only 1 group in parent */
+ if (parent->groups == parent->groups->next) {
+ pflags &= ~(SD_LOAD_BALANCE |
+ SD_BALANCE_NEWIDLE |
+ SD_BALANCE_FORK |
+ SD_BALANCE_EXEC);
+ }
+ if (~cflags & pflags)
+ return 0;
+
+ return 1;
+}
+
/*
* Attach the domain 'sd' to 'cpu' as its base domain. Callers must
* hold the hotplug lock.
@@ -4722,6 +4773,19 @@ void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu)
unsigned long flags;
runqueue_t *rq = cpu_rq(cpu);
int local = 1;
+ struct sched_domain *tmp;
+
+ /* Remove the sched domains which do not contribute to scheduling. */
+ for (tmp = sd; tmp; tmp = tmp->parent) {
+ struct sched_domain *parent = tmp->parent;
+ if (!parent)
+ break;
+ if (sd_parent_degenerate(tmp, parent))
+ tmp->parent = parent->parent;
+ }
+
+ if (sd && sd_degenerate(sd))
+ sd = sd->parent;
sched_domain_debug(sd, cpu);