From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 15:00:06 +0000 (+0200)
Subject: sched: remove wait_runtime fields and features
X-Git-Tag: v2.6.24-rc1~1289^2~107
X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=bbdba7c0e1161934ae881ad00e4db49830f5ef59;p=~shefty%2Frdma-dev.git

sched: remove wait_runtime fields and features

remove wait_runtime based fields and features, now that the CFS
math has been changed over to the vruntime metric.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 353630d6ae4..572df1bbaee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -888,13 +888,9 @@ struct load_weight {
  *     4 se->block_start
  *     4 se->run_node
  *     4 se->sleep_start
- *     4 se->sleep_start_fair
  *     6 se->load.weight
- *     7 se->delta_fair
- *    15 se->wait_runtime
  */
 struct sched_entity {
-	long			wait_runtime;
 	s64			fair_key;
 	struct load_weight	load;		/* for load-balancing */
 	struct rb_node		run_node;
@@ -904,12 +900,10 @@ struct sched_entity {
 	u64			sum_exec_runtime;
 	u64			vruntime;
 	u64			prev_sum_exec_runtime;
-	u64			wait_start_fair;
 
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
-	s64			sum_wait_runtime;
 
 	u64			sleep_start;
 	u64			sleep_max;
@@ -919,9 +913,6 @@ struct sched_entity {
 	u64			block_max;
 	u64			exec_max;
 	u64			slice_max;
-
-	unsigned long		wait_runtime_overruns;
-	unsigned long		wait_runtime_underruns;
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched.c b/kernel/sched.c
index 21cc3b2be02..0f0cf374c77 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,11 +176,8 @@ struct cfs_rq {
 	struct load_weight load;
 	unsigned long nr_running;
 
-	s64 fair_clock;
 	u64 exec_clock;
 	u64 min_vruntime;
-	s64 wait_runtime;
-	unsigned long wait_runtime_overruns, wait_runtime_underruns;
 
 	struct rb_root tasks_timeline;
 	struct rb_node *rb_leftmost;
@@ -389,20 +386,14 @@ static void update_rq_clock(struct rq *rq)
  * Debugging: various feature bits
  */
 enum {
-	SCHED_FEAT_FAIR_SLEEPERS	= 1,
-	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 2,
-	SCHED_FEAT_SLEEPER_AVG		= 4,
-	SCHED_FEAT_SLEEPER_LOAD_AVG	= 8,
-	SCHED_FEAT_START_DEBIT		= 16,
-	SCHED_FEAT_USE_TREE_AVG         = 32,
-	SCHED_FEAT_APPROX_AVG           = 64,
+	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 1,
+	SCHED_FEAT_START_DEBIT		= 2,
+	SCHED_FEAT_USE_TREE_AVG         = 4,
+	SCHED_FEAT_APPROX_AVG           = 8,
 };
 
 const_debug unsigned int sysctl_sched_features =
-		SCHED_FEAT_FAIR_SLEEPERS	*0 |
 		SCHED_FEAT_NEW_FAIR_SLEEPERS	*1 |
-		SCHED_FEAT_SLEEPER_AVG		*0 |
-		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
 		SCHED_FEAT_START_DEBIT		*1 |
 		SCHED_FEAT_USE_TREE_AVG		*0 |
 		SCHED_FEAT_APPROX_AVG		*0;
@@ -716,15 +707,11 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
 	lw->weight += inc;
-	if (sched_feat(FAIR_SLEEPERS))
-		lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
 static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
 {
 	lw->weight -= dec;
-	if (sched_feat(FAIR_SLEEPERS) && likely(lw->weight))
-		lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
 /*
@@ -848,8 +835,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq)
 
 static void set_load_weight(struct task_struct *p)
 {
-	p->se.wait_runtime = 0;
-
 	if (task_has_rt_policy(p)) {
 		p->se.load.weight = prio_to_weight[0] * 2;
 		p->se.load.inv_weight = prio_to_wmult[0] >> 1;
@@ -995,13 +980,9 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 	int old_cpu = task_cpu(p);
 	struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
-	u64 clock_offset, fair_clock_offset;
+	u64 clock_offset;
 
 	clock_offset = old_rq->clock - new_rq->clock;
-	fair_clock_offset = old_rq->cfs.fair_clock - new_rq->cfs.fair_clock;
-
-	if (p->se.wait_start_fair)
-		p->se.wait_start_fair -= fair_clock_offset;
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
@@ -1571,15 +1552,12 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state)
  */
 static void __sched_fork(struct task_struct *p)
 {
-	p->se.wait_start_fair		= 0;
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-	p->se.wait_runtime		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start		= 0;
-	p->se.sum_wait_runtime		= 0;
 	p->se.sum_sleep_runtime		= 0;
 	p->se.sleep_start		= 0;
 	p->se.block_start		= 0;
@@ -1588,8 +1566,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
-	p->se.wait_runtime_overruns	= 0;
-	p->se.wait_runtime_underruns	= 0;
 #endif
 
 	INIT_LIST_HEAD(&p->run_list);
@@ -6436,7 +6412,6 @@ int in_sched_functions(unsigned long addr)
 static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
 {
 	cfs_rq->tasks_timeline = RB_ROOT;
-	cfs_rq->fair_clock = 1;
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	cfs_rq->rq = rq;
 #endif
@@ -6562,15 +6537,12 @@ void normalize_rt_tasks(void)
 	read_lock_irq(&tasklist_lock);
 	do_each_thread(g, p) {
 		p->se.fair_key			= 0;
-		p->se.wait_runtime		= 0;
 		p->se.exec_start		= 0;
-		p->se.wait_start_fair		= 0;
 #ifdef CONFIG_SCHEDSTATS
 		p->se.wait_start		= 0;
 		p->se.sleep_start		= 0;
 		p->se.block_start		= 0;
 #endif
-		task_rq(p)->cfs.fair_clock	= 0;
 		task_rq(p)->clock		= 0;
 
 		if (!rt_task(p)) {
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3350169a7d2..e3b62324ac3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -36,21 +36,16 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	else
 		SEQ_printf(m, " ");
 
-	SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d ",
+	SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ",
 		p->comm, p->pid,
 		(long long)p->se.fair_key,
-		(long long)(p->se.fair_key - rq->cfs.fair_clock),
-		(long long)p->se.wait_runtime,
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld %15Ld\n",
+	SEQ_printf(m, "%15Ld %15Ld %15Ld\n",
 		(long long)p->se.vruntime,
 		(long long)p->se.sum_exec_runtime,
-		(long long)p->se.sum_wait_runtime,
-		(long long)p->se.sum_sleep_runtime,
-		(long long)p->se.wait_runtime_overruns,
-		(long long)p->se.wait_runtime_underruns);
+		(long long)p->se.sum_sleep_runtime);
 #else
 	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
 		0LL, 0LL, 0LL, 0LL, 0LL);
@@ -63,10 +58,8 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 	SEQ_printf(m,
 	"\nrunnable tasks:\n"
-	"            task   PID        tree-key         delta       waiting"
-	"  switches  prio"
-	"    exec-runtime        sum-exec        sum-wait       sum-sleep"
-	"    wait-overrun   wait-underrun\n"
+	"            task   PID        tree-key  switches  prio"
+	"    exec-runtime        sum-exec       sum-sleep\n"
 	"------------------------------------------------------------------"
 	"--------------------------------"
 	"------------------------------------------------"
@@ -84,29 +77,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_unlock_irq(&tasklist_lock);
 }
 
-static void
-print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
-{
-	s64 wait_runtime_rq_sum = 0;
-	struct task_struct *p;
-	struct rb_node *curr;
-	unsigned long flags;
-	struct rq *rq = &per_cpu(runqueues, cpu);
-
-	spin_lock_irqsave(&rq->lock, flags);
-	curr = first_fair(cfs_rq);
-	while (curr) {
-		p = rb_entry(curr, struct task_struct, se.run_node);
-		wait_runtime_rq_sum += p->se.wait_runtime;
-
-		curr = rb_next(curr);
-	}
-	spin_unlock_irqrestore(&rq->lock, flags);
-
-	SEQ_printf(m, "  .%-30s: %Ld\n", "wait_runtime_rq_sum",
-		(long long)wait_runtime_rq_sum);
-}
-
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
 	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -120,7 +90,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
 
-	P(fair_clock);
 	P(exec_clock);
 
 	spin_lock_irqsave(&rq->lock, flags);
@@ -144,13 +113,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	spread0 = min_vruntime - rq0_min_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld\n", "spread0",
 			(long long)spread0);
-
-	P(wait_runtime);
-	P(wait_runtime_overruns);
-	P(wait_runtime_underruns);
 #undef P
-
-	print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
@@ -268,8 +231,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #define P(F) \
 	SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
 
-	P(se.wait_runtime);
-	P(se.wait_start_fair);
 	P(se.exec_start);
 	P(se.vruntime);
 	P(se.sum_exec_runtime);
@@ -283,9 +244,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.exec_max);
 	P(se.slice_max);
 	P(se.wait_max);
-	P(se.wait_runtime_overruns);
-	P(se.wait_runtime_underruns);
-	P(se.sum_wait_runtime);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -312,8 +270,6 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
-	p->se.wait_runtime_overruns	= 0;
-	p->se.wait_runtime_underruns	= 0;
 #endif
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a94189c42d1..2df5a646781 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -178,8 +178,6 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_load_add(&cfs_rq->load, se->load.weight);
 	cfs_rq->nr_running++;
 	se->on_rq = 1;
-
-	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 }
 
 static void
@@ -192,8 +190,6 @@ __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	cfs_rq->nr_running--;
 	se->on_rq = 0;
-
-	schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
 }
 
 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
@@ -249,13 +245,6 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return period;
 }
 
-static void
-add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
-{
-	se->wait_runtime += delta;
-	schedstat_add(cfs_rq, wait_runtime, delta);
-}
-
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -264,9 +253,7 @@ static inline void
 __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	      unsigned long delta_exec)
 {
-	unsigned long delta_fair, delta_mine, delta_exec_weighted;
-	struct load_weight *lw = &cfs_rq->load;
-	unsigned long load = lw->weight;
+	unsigned long delta_exec_weighted;
 
 	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 
@@ -278,25 +265,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 							&curr->load);
 	}
 	curr->vruntime += delta_exec_weighted;
-
-	if (!sched_feat(FAIR_SLEEPERS))
-		return;
-
-	if (unlikely(!load))
-		return;
-
-	delta_fair = calc_delta_fair(delta_exec, lw);
-	delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
-
-	cfs_rq->fair_clock += delta_fair;
-	/*
-	 * We executed delta_exec amount of time on the CPU,
-	 * but we were only entitled to delta_mine amount of
-	 * time during that period (if nr_running == 1 then
-	 * the two values are equal)
-	 * [Note: delta_mine - delta_exec is negative]:
-	 */
-	add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec);
 }
 
 static void update_curr(struct cfs_rq *cfs_rq)
@@ -322,7 +290,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	se->wait_start_fair = cfs_rq->fair_clock;
 	schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
 }
 
@@ -354,35 +321,11 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	se->fair_key = se->vruntime;
 }
 
-/*
- * Note: must be called with a freshly updated rq->fair_clock.
- */
-static inline void
-__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se,
-			unsigned long delta_fair)
-{
-	schedstat_set(se->wait_max, max(se->wait_max,
-			rq_of(cfs_rq)->clock - se->wait_start));
-
-	delta_fair = calc_weighted(delta_fair, se);
-
-	add_wait_runtime(cfs_rq, se, delta_fair);
-}
-
 static void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	unsigned long delta_fair;
-
-	if (unlikely(!se->wait_start_fair))
-		return;
-
-	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
-			(u64)(cfs_rq->fair_clock - se->wait_start_fair));
-
-	__update_stats_wait_end(cfs_rq, se, delta_fair);
-
-	se->wait_start_fair = 0;
+	schedstat_set(se->wait_max, max(se->wait_max,
+			rq_of(cfs_rq)->clock - se->wait_start));
 	schedstat_set(se->wait_start, 0);
 }
 
@@ -552,9 +495,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	/*
 	 * Any task has to be enqueued before it get to execute on
 	 * a CPU. So account for the time it spent waiting on the
-	 * runqueue. (note, here we rely on pick_next_task() having
-	 * done a put_prev_task_fair() shortly before this, which
-	 * updated rq->fair_clock - used by update_stats_wait_end())
+	 * runqueue.
 	 */
 	update_stats_wait_end(cfs_rq, se);
 	update_stats_curr_start(cfs_rq, se);
@@ -989,13 +930,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	update_curr(cfs_rq);
 	place_entity(cfs_rq, se, 1);
 
-	/*
-	 * The statistical average of wait_runtime is about
-	 * -granularity/2, so initialize the task with that:
-	 */
-	if (sched_feat(START_DEBIT))
-		se->wait_runtime = -(__sched_period(cfs_rq->nr_running+1) / 2);
-
 	if (sysctl_sched_child_runs_first &&
 			curr->vruntime < se->vruntime) {