]> git.baikalelectronics.ru Git - kernel.git/commitdiff
mm/memcg: add oom_group_kill memory event
authorDan Schatzberg <schatzberg.dan@gmail.com>
Fri, 14 Jan 2022 22:05:35 +0000 (14:05 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 15 Jan 2022 14:30:27 +0000 (16:30 +0200)
Our container agent wants to know when a container exits if it was OOM
killed or not to report to the user.  We use memory.oom.group = 1 to
ensure that OOM kills within the container's cgroup kill everything.
Existing memory.events are insufficient for knowing if this triggered:

1) Our current approach reads memory.events oom_kill and reports the
   container was killed if the value is non-zero. This is erroneous in
   some cases where containers create their children cgroups with
   memory.oom.group=1 as such OOM kills will get counted against the
   container cgroup's oom_kill counter despite not actually OOM killing
   the entire container.

2) Reading memory.events.local will fail to identify OOM kills in leaf
   cgroups (that don't set memory.oom.group) within the container
   cgroup.

This patch adds a new oom_group_kill event when memory.oom.group
triggers to allow userspace to cleanly identify when an entire cgroup is
oom killed.

[schatzberg.dan@gmail.com: changes from Johannes and Chris]
Link: https://lkml.kernel.org/r/20211213162511.2492267-1-schatzberg.dan@gmail.com
Link: https://lkml.kernel.org/r/20211203162426.3375036-1-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Chris Down <chris@chrisdown.name>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/admin-guide/cgroup-v2.rst
include/linux/memcontrol.h
mm/memcontrol.c
mm/oom_kill.c

index 2aeb7ae8b39348e65bd52ca455fd848e398a8e8d..8269bfa240f46108f397f5569c18ba96b56a04ce 100644 (file)
@@ -1268,6 +1268,9 @@ PAGE_SIZE multiple when read back.
                The number of processes belonging to this cgroup
                killed by any kind of OOM killer.
 
+          oom_group_kill
+                The number of times a group OOM has occurred.
+
   memory.events.local
        Similar to memory.events but the fields in the file are local
        to the cgroup i.e. not hierarchical. The file modified event
index 0c5c403f4be6ba111600fad90cda0862b2a216da..951f24f4214778f0d1e8527cb5f071c8d236aec9 100644 (file)
@@ -42,6 +42,7 @@ enum memcg_memory_event {
        MEMCG_MAX,
        MEMCG_OOM,
        MEMCG_OOM_KILL,
+       MEMCG_OOM_GROUP_KILL,
        MEMCG_SWAP_HIGH,
        MEMCG_SWAP_MAX,
        MEMCG_SWAP_FAIL,
index bfe9bdec192b6de000c086baf961aae29da49a00..2d39d58baccfde48a9bb7549b6681a02d5ba6104 100644 (file)
@@ -6318,6 +6318,8 @@ static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
        seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
        seq_printf(m, "oom_kill %lu\n",
                   atomic_long_read(&events[MEMCG_OOM_KILL]));
+       seq_printf(m, "oom_group_kill %lu\n",
+                  atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
 }
 
 static int memory_events_show(struct seq_file *m, void *v)
index 1ddabefcfb5aba566205264d127702976e622ec6..e52ce0b1465d6a426889dffbf6c93999e7812b37 100644 (file)
@@ -994,6 +994,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
         * If necessary, kill all tasks in the selected memory cgroup.
         */
        if (oom_group) {
+               memcg_memory_event(oom_group, MEMCG_OOM_GROUP_KILL);
                mem_cgroup_print_oom_group(oom_group);
                mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member,
                                      (void *)message);