mm: memcontrol: fix cgroup creation failure after many small jobs

author Johannes Weiner <hannes@cmpxchg.org>

Wed, 20 Jul 2016 22:44:57 +0000 (15:44 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 23 Jul 2016 01:25:54 +0000 (10:25 +0900)
author Johannes Weiner <hannes@cmpxchg.org>
Wed, 20 Jul 2016 22:44:57 +0000 (15:44 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Jul 2016 01:25:54 +0000 (10:25 +0900)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index a805474df4abd8c70c83bdbd3b383ad9c02eca1c..56e6069d245271539f14bf34c204122665e28ab6 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -97,6 +97,11 @@ enum mem_cgroup_events_target {
  #define MEM_CGROUP_ID_SHIFT    16
  #define MEM_CGROUP_ID_MAX      USHRT_MAX
  
+struct mem_cgroup_id {
+       int id;
+       atomic_t ref;
+};
+
  struct mem_cgroup_stat_cpu {
         long count[MEMCG_NR_STAT];
         unsigned long events[MEMCG_NR_EVENTS];
@@ -172,6 +177,9 @@ enum memcg_kmem_state {
  struct mem_cgroup {
         struct cgroup_subsys_state css;
  
+       /* Private memcg ID. Used to ID objects that outlive the cgroup */
+       struct mem_cgroup_id id;
+
         /* Accounted resources */
         struct page_counter memory;
         struct page_counter swap;
@@ -330,22 +338,9 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
         if (mem_cgroup_disabled())
                 return 0;
  
-       return memcg->css.id;
-}
-
-/**
- * mem_cgroup_from_id - look up a memcg from an id
- * @id: the id to look up
- *
- * Caller must hold rcu_read_lock() and use css_tryget() as necessary.
- */
-static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
-{
-       struct cgroup_subsys_state *css;
-
-       css = css_from_id(id, &memory_cgrp_subsys);
-       return mem_cgroup_from_css(css);
+       return memcg->id.id;
  }
+struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
  
  /**
   * parent_mem_cgroup - find the accounting parent of a memcg
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index ac8664db38232f5ede345ee4b2f2f9ec0c5ac79d..5339c89dff6317510b2710e9ab2770c23ac71e1e 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4057,6 +4057,60 @@ static struct cftype mem_cgroup_legacy_files[] = {
         { },    /* terminate */
  };
  
+/*
+ * Private memory cgroup IDR
+ *
+ * Swap-out records and page cache shadow entries need to store memcg
+ * references in constrained space, so we maintain an ID space that is
+ * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of
+ * memory-controlled cgroups to 64k.
+ *
+ * However, there usually are many references to the oflline CSS after
+ * the cgroup has been destroyed, such as page cache or reclaimable
+ * slab objects, that don't need to hang on to the ID. We want to keep
+ * those dead CSS from occupying IDs, or we might quickly exhaust the
+ * relatively small ID space and prevent the creation of new cgroups
+ * even when there are much fewer than 64k cgroups - possibly none.
+ *
+ * Maintain a private 16-bit ID space for memcg, and allow the ID to
+ * be freed and recycled when it's no longer needed, which is usually
+ * when the CSS is offlined.
+ *
+ * The only exception to that are records of swapped out tmpfs/shmem
+ * pages that need to be attributed to live ancestors on swapin. But
+ * those references are manageable from userspace.
+ */
+
+static DEFINE_IDR(mem_cgroup_idr);
+
+static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+       atomic_inc(&memcg->id.ref);
+}
+
+static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+       if (atomic_dec_and_test(&memcg->id.ref)) {
+               idr_remove(&mem_cgroup_idr, memcg->id.id);
+               memcg->id.id = 0;
+
+               /* Memcg ID pins CSS */
+               css_put(&memcg->css);
+       }
+}
+
+/**
+ * mem_cgroup_from_id - look up a memcg from a memcg id
+ * @id: the memcg id to look up
+ *
+ * Caller must hold rcu_read_lock().
+ */
+struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       return idr_find(&mem_cgroup_idr, id);
+}
+
  static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
  {
         struct mem_cgroup_per_node *pn;
@@ -4116,6 +4170,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
         if (!memcg)
                 return NULL;
  
+       memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
+                                1, MEM_CGROUP_ID_MAX,
+                                GFP_KERNEL);
+       if (memcg->id.id < 0)
+               goto fail;
+
         memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
         if (!memcg->stat)
                 goto fail;
@@ -4142,8 +4202,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
  #ifdef CONFIG_CGROUP_WRITEBACK
         INIT_LIST_HEAD(&memcg->cgwb_list);
  #endif
+       idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
         return memcg;
  fail:
+       if (memcg->id.id > 0)
+               idr_remove(&mem_cgroup_idr, memcg->id.id);
         mem_cgroup_free(memcg);
         return NULL;
  }
@@ -4206,12 +4269,11 @@ fail:
         return ERR_PTR(-ENOMEM);
  }
  
-static int
-mem_cgroup_css_online(struct cgroup_subsys_state *css)
+static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
  {
-       if (css->id > MEM_CGROUP_ID_MAX)
-               return -ENOSPC;
-
+       /* Online state pins memcg ID, memcg ID pins CSS */
+       mem_cgroup_id_get(mem_cgroup_from_css(css));
+       css_get(css);
         return 0;
  }
  
@@ -4234,6 +4296,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
  
         memcg_offline_kmem(memcg);
         wb_memcg_offline(memcg);
+
+       mem_cgroup_id_put(memcg);
  }
  
  static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
@@ -5756,6 +5820,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
         if (!memcg)
                 return;
  
+       mem_cgroup_id_get(memcg);
         oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
         VM_BUG_ON_PAGE(oldid, page);
         mem_cgroup_swap_statistics(memcg, true);
@@ -5774,6 +5839,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
         VM_BUG_ON(!irqs_disabled());
         mem_cgroup_charge_statistics(memcg, page, false, -1);
         memcg_check_events(memcg, page);
+
+       if (!mem_cgroup_is_root(memcg))
+               css_put(&memcg->css);
  }
  
  /*
@@ -5804,11 +5872,11 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
             !page_counter_try_charge(&memcg->swap, 1, &counter))
                 return -ENOMEM;
  
+       mem_cgroup_id_get(memcg);
         oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
         VM_BUG_ON_PAGE(oldid, page);
         mem_cgroup_swap_statistics(memcg, true);
  
-       css_get(&memcg->css);
         return 0;
  }
  
@@ -5837,7 +5905,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
                                 page_counter_uncharge(&memcg->memsw, 1);
                 }
                 mem_cgroup_swap_statistics(memcg, false);
-               css_put(&memcg->css);
+               mem_cgroup_id_put(memcg);
         }
         rcu_read_unlock();
  }
diff --git a/mm/slab_common.c b/mm/slab_common.c

index a65dad7fdcd12495a51eabd91fc76ed96edb0576..82317abb03edc7aa2c89e0a20032fc57a532a723 100644 (file)
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -526,8 +526,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
                 goto out_unlock;
  
         cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
-       cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
-                              css->id, memcg_name_buf);
+       cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
+                              css->serial_nr, memcg_name_buf);
         if (!cache_name)
                 goto out_unlock;
author	Johannes Weiner <hannes@cmpxchg.org>
	Wed, 20 Jul 2016 22:44:57 +0000 (15:44 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 23 Jul 2016 01:25:54 +0000 (10:25 +0900)
include/linux/memcontrol.h		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/slab_common.c		patch \| blob \| history