From: Nick Piggin I was able to reproduce the half-load kernbench problems on the non-NUMA stp 8-way. I made a pretty simple "lessidle" patch which tweaks some sched domain parameters to be more inclined to move tasks, especially when idle. That brought performance to nearly exactly the same as 2.6.3. Context switches are still up, but user and system time is down a bit. So indicates it is still less balance-happy but is obviously enough to bring the idle time down. 2.6.3: http://khack.osdl.org/stp/288459/ 2.6.3-mm4-lessidle: http://khack.osdl.org/stp/288995/ So it is more a matter of tuning than anything fundamental. It may be that the patch now makes balancing too aggressive, but it is probably better to err on the side that is closer to 2.6 behaviour. I haven't tested this on much else. --- 25-akpm/include/linux/sched.h | 16 ++++++++-------- 25-akpm/kernel/sched.c | 5 +++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff -puN include/linux/sched.h~sched-domains-improvements include/linux/sched.h --- 25/include/linux/sched.h~sched-domains-improvements Thu Mar 11 14:33:22 2004 +++ 25-akpm/include/linux/sched.h Thu Mar 11 14:33:22 2004 @@ -565,11 +565,11 @@ struct sched_domain { .parent = NULL, \ .groups = NULL, \ .min_interval = 1, \ - .max_interval = 8, \ - .busy_factor = 32, \ + .max_interval = 4, \ + .busy_factor = 64, \ .imbalance_pct = 125, \ - .cache_hot_time = (5*1000000), \ - .cache_nice_tries = 2, \ + .cache_hot_time = (5*1000000/2), \ + .cache_nice_tries = 1, \ .flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE,\ .balance_interval = 1, \ .nr_balance_failed = 0, \ @@ -581,11 +581,11 @@ struct sched_domain { .span = CPU_MASK_NONE, \ .parent = NULL, \ .groups = NULL, \ - .min_interval = 20, \ - .max_interval = 1000*fls(num_online_cpus()),\ - .busy_factor = 4, \ + .min_interval = 8, \ + .max_interval = 256*fls(num_online_cpus()),\ + .busy_factor = 8, \ .imbalance_pct = 125, \ - .cache_hot_time = (5*1000000), \ + .cache_hot_time = (10*1000000), \ .cache_nice_tries = 1, \ .flags = SD_FLAG_EXEC, \ .balance_interval = 1, \ diff -puN kernel/sched.c~sched-domains-improvements kernel/sched.c --- 25/kernel/sched.c~sched-domains-improvements Thu Mar 11 14:33:22 2004 +++ 25-akpm/kernel/sched.c Thu Mar 11 14:33:22 2004 @@ -1475,6 +1475,11 @@ nextgroup: return busiest; out_balanced: + if (busiest && idle == NEWLY_IDLE) { + *imbalance = 1; + return busiest; + } + *imbalance = 0; return NULL; } _