]> git.baikalelectronics.ru Git - kernel.git/commitdiff
sched/core: add forced idle accounting for cgroups
authorJosh Don <joshdon@google.com>
Wed, 29 Jun 2022 21:14:26 +0000 (14:14 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 4 Jul 2022 07:23:07 +0000 (09:23 +0200)
e51fca93496 previously added per-task forced idle accounting. This patch
extends this to also include cgroups.

rstat is used for cgroup accounting, except for the root, which uses
kcpustat in order to bypass the need for doing an rstat flush when
reading root stats.

Only cgroup v2 is supported. Similar to the task accounting, the cgroup
accounting requires that schedstats is enabled.

Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lkml.kernel.org/r/20220629211426.3329954-1-joshdon@google.com
include/linux/cgroup-defs.h
include/linux/kernel_stat.h
kernel/cgroup/rstat.c
kernel/sched/core_sched.c
kernel/sched/cputime.c

index 1bfcfb1af3524f46388d240a4246218a6b042f6f..025fd0e84a316489e9ec11b0dde908a40d4c19b4 100644 (file)
@@ -287,6 +287,10 @@ struct css_set {
 
 struct cgroup_base_stat {
        struct task_cputime cputime;
+
+#ifdef CONFIG_SCHED_CORE
+       u64 forceidle_sum;
+#endif
 };
 
 /*
index 69ae6b27846452392b9beacb2ba2fbb7278c731c..ddb5a358fd829f453d20f0c702f84cda168eef25 100644 (file)
@@ -28,6 +28,9 @@ enum cpu_usage_stat {
        CPUTIME_STEAL,
        CPUTIME_GUEST,
        CPUTIME_GUEST_NICE,
+#ifdef CONFIG_SCHED_CORE
+       CPUTIME_FORCEIDLE,
+#endif
        NR_STATS,
 };
 
@@ -115,4 +118,8 @@ extern void account_process_tick(struct task_struct *, int user);
 
 extern void account_idle_ticks(unsigned long ticks);
 
+#ifdef CONFIG_SCHED_CORE
+extern void __account_forceidle_time(struct task_struct *tsk, u64 delta);
+#endif
+
 #endif /* _LINUX_KERNEL_STAT_H */
index 24b5c2ab55983abb4321d4e0ec792d5c8b2e1b7b..feb59380c89627e30dbe197722ff2c5fc2f830ba 100644 (file)
@@ -310,6 +310,9 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
        dst_bstat->cputime.utime += src_bstat->cputime.utime;
        dst_bstat->cputime.stime += src_bstat->cputime.stime;
        dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
+#ifdef CONFIG_SCHED_CORE
+       dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
+#endif
 }
 
 static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
@@ -318,6 +321,9 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
        dst_bstat->cputime.utime -= src_bstat->cputime.utime;
        dst_bstat->cputime.stime -= src_bstat->cputime.stime;
        dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
+#ifdef CONFIG_SCHED_CORE
+       dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
+#endif
 }
 
 static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
@@ -398,6 +404,11 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
        case CPUTIME_SOFTIRQ:
                rstatc->bstat.cputime.stime += delta_exec;
                break;
+#ifdef CONFIG_SCHED_CORE
+       case CPUTIME_FORCEIDLE:
+               rstatc->bstat.forceidle_sum += delta_exec;
+               break;
+#endif
        default:
                break;
        }
@@ -411,8 +422,9 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
  * with how it is done by __cgroup_account_cputime_field for each bit of
  * cpu time attributed to a cgroup.
  */
-static void root_cgroup_cputime(struct task_cputime *cputime)
+static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
 {
+       struct task_cputime *cputime = &bstat->cputime;
        int i;
 
        cputime->stime = 0;
@@ -438,6 +450,10 @@ static void root_cgroup_cputime(struct task_cputime *cputime)
                cputime->sum_exec_runtime += user;
                cputime->sum_exec_runtime += sys;
                cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];
+
+#ifdef CONFIG_SCHED_CORE
+               bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
+#endif
        }
 }
 
@@ -445,27 +461,43 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
 {
        struct cgroup *cgrp = seq_css(seq)->cgroup;
        u64 usage, utime, stime;
-       struct task_cputime cputime;
+       struct cgroup_base_stat bstat;
+#ifdef CONFIG_SCHED_CORE
+       u64 forceidle_time;
+#endif
 
        if (cgroup_parent(cgrp)) {
                cgroup_rstat_flush_hold(cgrp);
                usage = cgrp->bstat.cputime.sum_exec_runtime;
                cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
                               &utime, &stime);
+#ifdef CONFIG_SCHED_CORE
+               forceidle_time = cgrp->bstat.forceidle_sum;
+#endif
                cgroup_rstat_flush_release();
        } else {
-               root_cgroup_cputime(&cputime);
-               usage = cputime.sum_exec_runtime;
-               utime = cputime.utime;
-               stime = cputime.stime;
+               root_cgroup_cputime(&bstat);
+               usage = bstat.cputime.sum_exec_runtime;
+               utime = bstat.cputime.utime;
+               stime = bstat.cputime.stime;
+#ifdef CONFIG_SCHED_CORE
+               forceidle_time = bstat.forceidle_sum;
+#endif
        }
 
        do_div(usage, NSEC_PER_USEC);
        do_div(utime, NSEC_PER_USEC);
        do_div(stime, NSEC_PER_USEC);
+#ifdef CONFIG_SCHED_CORE
+       do_div(forceidle_time, NSEC_PER_USEC);
+#endif
 
        seq_printf(seq, "usage_usec %llu\n"
                   "user_usec %llu\n"
                   "system_usec %llu\n",
                   usage, utime, stime);
+
+#ifdef CONFIG_SCHED_CORE
+       seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time);
+#endif
 }
index 38a2cec21014d8805f433cdb87e71553546fe600..5103502da7baa651ef9ad330e7145328a1cb5773 100644 (file)
@@ -277,7 +277,11 @@ void __sched_core_account_forceidle(struct rq *rq)
                if (p == rq_i->idle)
                        continue;
 
-               __schedstat_add(p->stats.core_forceidle_sum, delta);
+               /*
+                * Note: this will account forceidle to the current cpu, even
+                * if it comes from our SMT sibling.
+                */
+               __account_forceidle_time(p, delta);
        }
 }
 
index 78a233d43757fca50aa693092790f8e41bdd649f..95fc778537434da8f1689cc2a788450da8fc2b1d 100644 (file)
@@ -226,6 +226,21 @@ void account_idle_time(u64 cputime)
                cpustat[CPUTIME_IDLE] += cputime;
 }
 
+
+#ifdef CONFIG_SCHED_CORE
+/*
+ * Account for forceidle time due to core scheduling.
+ *
+ * REQUIRES: schedstat is enabled.
+ */
+void __account_forceidle_time(struct task_struct *p, u64 delta)
+{
+       __schedstat_add(p->stats.core_forceidle_sum, delta);
+
+       task_group_account_field(p, CPUTIME_FORCEIDLE, delta);
+}
+#endif
+
 /*
  * When a guest is interrupted for a longer amount of time, missed clock
  * ticks are not redelivered later. Due to that, this function may on