From: Dietmar Eggemann Date: Thu, 18 Nov 2021 16:42:40 +0000 (+0100) Subject: sched/fair: Replace CFS internal cpu_util() with cpu_util_cfs() X-Git-Tag: baikal/aarch64/sdk6.1~4917^2 X-Git-Url: https://git.baikalelectronics.ru/sdk/?a=commitdiff_plain;h=b3124f0c786e076ad262886686144ac4235bb63b;p=kernel.git sched/fair: Replace CFS internal cpu_util() with cpu_util_cfs() cpu_util_cfs() was created by commit 437f7aa0c95d ("sched/cpufreq: Use the DEADLINE utilization signal") to enable the access to CPU utilization from the Schedutil CPUfreq governor. Commit a0142ed55983 ("sched/cpufreq/schedutil: Use util_est for OPP selection") added util_est support later. The only thing cpu_util() is doing on top of what cpu_util_cfs() already does is to clamp the return value to the [0..capacity_orig] capacity range of the CPU. Integrating this into cpu_util_cfs() is not harming the existing users (Schedutil and CPUfreq cooling (latter via sched_cpu_util() wrapper)). For straightforwardness, prefer to keep using `int cpu` as the function parameter over using `struct rq *rq` which might avoid some calls to cpu_rq(cpu) -> per_cpu(runqueues, cpu) -> RELOC_HIDE(). Update cfs_util()'s documentation and reuse it for cpu_util_cfs(). Remove cpu_util(). Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20211118164240.623551-1-dietmar.eggemann@arm.com --- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index beaa8be6241e1..fe53e510e7114 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7166,7 +7166,7 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs, unsigned long sched_cpu_util(int cpu, unsigned long max) { - return effective_cpu_util(cpu, cpu_util_cfs(cpu_rq(cpu)), max, + return effective_cpu_util(cpu, cpu_util_cfs(cpu), max, ENERGY_UTIL, NULL); } #endif /* CONFIG_SMP */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e7af18857371e..26778884d9ab1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -168,7 +168,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) sg_cpu->max = max; sg_cpu->bw_dl = cpu_bw_dl(rq); - sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max, + sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max, FREQUENCY_UTIL, NULL); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ac5e55441cab0..095b0aa378df0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1502,7 +1502,6 @@ struct task_numa_env { static unsigned long cpu_load(struct rq *rq); static unsigned long cpu_runnable(struct rq *rq); -static unsigned long cpu_util(int cpu); static inline long adjust_numa_imbalance(int imbalance, int dst_running, int dst_weight); @@ -1569,7 +1568,7 @@ static void update_numa_stats(struct task_numa_env *env, ns->load += cpu_load(rq); ns->runnable += cpu_runnable(rq); - ns->util += cpu_util(cpu); + ns->util += cpu_util_cfs(cpu); ns->nr_running += rq->cfs.h_nr_running; ns->compute_capacity += capacity_of(cpu); @@ -3240,7 +3239,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags) * As is, the util number is not freq-invariant (we'd have to * implement arch_scale_freq_capacity() for that). * - * See cpu_util(). + * See cpu_util_cfs(). */ cpufreq_update_util(rq, flags); } @@ -5510,11 +5509,9 @@ static inline void hrtick_update(struct rq *rq) #endif #ifdef CONFIG_SMP -static inline unsigned long cpu_util(int cpu); - static inline bool cpu_overutilized(int cpu) { - return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); + return !fits_capacity(cpu_util_cfs(cpu), capacity_of(cpu)); } static inline void update_overutilized_status(struct rq *rq) @@ -6459,58 +6456,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } -/** - * cpu_util - Estimates the amount of capacity of a CPU used by CFS tasks. - * @cpu: the CPU to get the utilization of - * - * The unit of the return value must be the one of capacity so we can compare - * the utilization with the capacity of the CPU that is available for CFS task - * (ie cpu_capacity). - * - * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the - * recent utilization of currently non-runnable tasks on a CPU. It represents - * the amount of utilization of a CPU in the range [0..capacity_orig] where - * capacity_orig is the cpu_capacity available at the highest frequency - * (arch_scale_freq_capacity()). - * The utilization of a CPU converges towards a sum equal to or less than the - * current capacity (capacity_curr <= capacity_orig) of the CPU because it is - * the running time on this CPU scaled by capacity_curr. - * - * The estimated utilization of a CPU is defined to be the maximum between its - * cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks - * currently RUNNABLE on that CPU. - * This allows to properly represent the expected utilization of a CPU which - * has just got a big task running since a long sleep period. At the same time - * however it preserves the benefits of the "blocked utilization" in - * describing the potential for other tasks waking up on the same CPU. - * - * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even - * higher than capacity_orig because of unfortunate rounding in - * cfs.avg.util_avg or just after migrating tasks and new task wakeups until - * the average stabilizes with the new running time. We need to check that the - * utilization stays within the range of [0..capacity_orig] and cap it if - * necessary. Without utilization capping, a group could be seen as overloaded - * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of - * available capacity. We allow utilization to overshoot capacity_curr (but not - * capacity_orig) as it useful for predicting the capacity required after task - * migrations (scheduler-driven DVFS). - * - * Return: the (estimated) utilization for the specified CPU - */ -static inline unsigned long cpu_util(int cpu) -{ - struct cfs_rq *cfs_rq; - unsigned int util; - - cfs_rq = &cpu_rq(cpu)->cfs; - util = READ_ONCE(cfs_rq->avg.util_avg); - - if (sched_feat(UTIL_EST)) - util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); - - return min_t(unsigned long, util, capacity_orig_of(cpu)); -} - /* * cpu_util_without: compute cpu utilization without any contributions from *p * @cpu: the CPU which utilization is requested @@ -6531,7 +6476,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) /* Task has no contribution or is new */ if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) - return cpu_util(cpu); + return cpu_util_cfs(cpu); cfs_rq = &cpu_rq(cpu)->cfs; util = READ_ONCE(cfs_rq->avg.util_avg); @@ -6595,7 +6540,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) /* * Utilization (estimated) can exceed the CPU capacity, thus let's * clamp to the maximum CPU capacity to ensure consistency with - * the cpu_util call. + * cpu_util. */ return min_t(unsigned long, util, capacity_orig_of(cpu)); } @@ -6627,7 +6572,7 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu) * During wake-up, the task isn't enqueued yet and doesn't * appear in the cfs_rq->avg.util_est.enqueued of any rq, * so just add it (if needed) to "simulate" what will be - * cpu_util() after the task has been enqueued. + * cpu_util after the task has been enqueued. */ if (dst_cpu == cpu) util_est += _task_util_est(p); @@ -8689,7 +8634,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, struct rq *rq = cpu_rq(i); sgs->group_load += cpu_load(rq); - sgs->group_util += cpu_util(i); + sgs->group_util += cpu_util_cfs(i); sgs->group_runnable += cpu_runnable(rq); sgs->sum_h_nr_running += rq->cfs.h_nr_running; @@ -9707,7 +9652,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, break; case migrate_util: - util = cpu_util(cpu_of(rq)); + util = cpu_util_cfs(i); /* * Don't try to pull utilization from a CPU with one diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eb971151e7e45..de53be9057390 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2966,16 +2966,52 @@ static inline unsigned long cpu_util_dl(struct rq *rq) return READ_ONCE(rq->avg_dl.util_avg); } -static inline unsigned long cpu_util_cfs(struct rq *rq) +/** + * cpu_util_cfs() - Estimates the amount of CPU capacity used by CFS tasks. + * @cpu: the CPU to get the utilization for. + * + * The unit of the return value must be the same as the one of CPU capacity + * so that CPU utilization can be compared with CPU capacity. + * + * CPU utilization is the sum of running time of runnable tasks plus the + * recent utilization of currently non-runnable tasks on that CPU. + * It represents the amount of CPU capacity currently used by CFS tasks in + * the range [0..max CPU capacity] with max CPU capacity being the CPU + * capacity at f_max. + * + * The estimated CPU utilization is defined as the maximum between CPU + * utilization and sum of the estimated utilization of the currently + * runnable tasks on that CPU. It preserves a utilization "snapshot" of + * previously-executed tasks, which helps better deduce how busy a CPU will + * be when a long-sleeping task wakes up. The contribution to CPU utilization + * of such a task would be significantly decayed at this point of time. + * + * CPU utilization can be higher than the current CPU capacity + * (f_curr/f_max * max CPU capacity) or even the max CPU capacity because + * of rounding errors as well as task migrations or wakeups of new tasks. + * CPU utilization has to be capped to fit into the [0..max CPU capacity] + * range. Otherwise a group of CPUs (CPU0 util = 121% + CPU1 util = 80%) + * could be seen as over-utilized even though CPU1 has 20% of spare CPU + * capacity. CPU utilization is allowed to overshoot current CPU capacity + * though since this is useful for predicting the CPU capacity required + * after task migrations (scheduler-driven DVFS). + * + * Return: (Estimated) utilization for the specified CPU. + */ +static inline unsigned long cpu_util_cfs(int cpu) { - unsigned long util = READ_ONCE(rq->cfs.avg.util_avg); + struct cfs_rq *cfs_rq; + unsigned long util; + + cfs_rq = &cpu_rq(cpu)->cfs; + util = READ_ONCE(cfs_rq->avg.util_avg); if (sched_feat(UTIL_EST)) { util = max_t(unsigned long, util, - READ_ONCE(rq->cfs.avg.util_est.enqueued)); + READ_ONCE(cfs_rq->avg.util_est.enqueued)); } - return util; + return min(util, capacity_orig_of(cpu)); } static inline unsigned long cpu_util_rt(struct rq *rq)