From: Ingo Molnar Date: Fri, 9 Nov 2007 21:39:38 +0000 (+0100) Subject: sched: reintroduce SMP tunings again X-Git-Tag: v2.6.24-rc3~170^2~12 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=19978ca610946ed57c071bad63f8f6642ca1298b;p=~emulex%2Finfiniband.git sched: reintroduce SMP tunings again Yanmin Zhang reported an aim7 regression and bisected it down to: | commit 38ad464d410dadceda1563f36bdb0be7fe4c8938 | Author: Ingo Molnar | Date: Mon Oct 15 17:00:02 2007 +0200 | | sched: uniform tunings | | use the same defaults on both UP and SMP. fix this by reintroducing similar SMP tunings again. This resolves the regression. (also update the comments to match the ilog2(nr_cpus) tuning effect) Signed-off-by: Ingo Molnar --- diff --git a/kernel/sched.c b/kernel/sched.c index 3f6bd111290..69cae271c63 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4992,6 +4992,32 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; +/* + * Increase the granularity value when there are more CPUs, + * because with more CPUs the 'effective latency' as visible + * to users decreases. But the relationship is not linear, + * so pick a second-best guess by going with the log2 of the + * number of CPUs. + * + * This idea comes from the SD scheduler of Con Kolivas: + */ +static inline void sched_init_granularity(void) +{ + unsigned int factor = 1 + ilog2(num_online_cpus()); + const unsigned long limit = 200000000; + + sysctl_sched_min_granularity *= factor; + if (sysctl_sched_min_granularity > limit) + sysctl_sched_min_granularity = limit; + + sysctl_sched_latency *= factor; + if (sysctl_sched_latency > limit) + sysctl_sched_latency = limit; + + sysctl_sched_wakeup_granularity *= factor; + sysctl_sched_batch_wakeup_granularity *= factor; +} + #ifdef CONFIG_SMP /* * This is how migration works: @@ -6688,10 +6714,12 @@ void __init sched_init_smp(void) /* Move init over to a non-isolated CPU */ if (set_cpus_allowed(current, non_isolated_cpus) < 0) BUG(); + sched_init_granularity(); } #else void __init sched_init_smp(void) { + sched_init_granularity(); } #endif /* CONFIG_SMP */ diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c495dcf7031..7264814ba62 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -22,7 +22,7 @@ /* * Targeted preemption latency for CPU-bound tasks: - * (default: 20ms, units: nanoseconds) + * (default: 20ms * ilog(ncpus), units: nanoseconds) * * NOTE: this latency value is not the same as the concept of * 'timeslice length' - timeslices in CFS are of variable length @@ -32,18 +32,18 @@ * (to see the precise effective timeslice length of your workload, * run vmstat and monitor the context-switches (cs) field) */ -const_debug unsigned int sysctl_sched_latency = 20000000ULL; +unsigned int sysctl_sched_latency = 20000000ULL; /* * Minimal preemption granularity for CPU-bound tasks: - * (default: 1 msec, units: nanoseconds) + * (default: 1 msec * ilog(ncpus), units: nanoseconds) */ -const_debug unsigned int sysctl_sched_min_granularity = 1000000ULL; +unsigned int sysctl_sched_min_granularity = 1000000ULL; /* * is kept at sysctl_sched_latency / sysctl_sched_min_granularity */ -const_debug unsigned int sched_nr_latency = 20; +unsigned int sched_nr_latency = 20; /* * After fork, child runs first. (default) If set to 0 then @@ -61,23 +61,23 @@ unsigned int __read_mostly sysctl_sched_compat_yield; /* * SCHED_BATCH wake-up granularity. - * (default: 10 msec, units: nanoseconds) + * (default: 10 msec * ilog(ncpus), units: nanoseconds) * * This option delays the preemption effects of decoupled workloads * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */ -const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; +unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; /* * SCHED_OTHER wake-up granularity. - * (default: 10 msec, units: nanoseconds) + * (default: 10 msec * ilog(ncpus), units: nanoseconds) * * This option delays the preemption effects of decoupled workloads * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */ -const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL; +unsigned int sysctl_sched_wakeup_granularity = 10000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL;