fs: fsnotify: account fsnotify metadata to kmemcg

author Shakeel Butt <shakeelb@google.com>

Fri, 17 Aug 2018 22:46:39 +0000 (15:46 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 17 Aug 2018 23:20:30 +0000 (16:20 -0700)
author Shakeel Butt <shakeelb@google.com>
Fri, 17 Aug 2018 22:46:39 +0000 (15:46 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Aug 2018 23:20:30 +0000 (16:20 -0700)
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c

index e2bea2ac5dfb2809ae9460edce3a58916456c9ef..a6365e6bc04704fd8f0735a1a90af5e72658b6ba 100644 (file)
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -384,8 +384,9 @@ out_err:
  
  static int __init dnotify_init(void)
  {
-       dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-       dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
+       dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
+                                         SLAB_PANIC|SLAB_ACCOUNT);
+       dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
  
         dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
         if (IS_ERR(dnotify_group))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c

index f90842efea13c95390e85cb22897cadbaa376700..eb4e75175cfb77bac8490e1a7c5a713aaeebc594 100644 (file)
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -11,6 +11,7 @@
  #include <linux/types.h>
  #include <linux/wait.h>
  #include <linux/audit.h>
+#include <linux/sched/mm.h>
  
  #include "fanotify.h"
  
@@ -140,8 +141,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
                                                  struct inode *inode, u32 mask,
                                                  const struct path *path)
  {
-       struct fanotify_event_info *event;
-       gfp_t gfp = GFP_KERNEL;
+       struct fanotify_event_info *event = NULL;
+       gfp_t gfp = GFP_KERNEL_ACCOUNT;
  
         /*
          * For queues with unlimited length lost events are not expected and
@@ -151,19 +152,22 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
         if (group->max_events == UINT_MAX)
                 gfp |= __GFP_NOFAIL;
  
+       /* Whoever is interested in the event, pays for the allocation. */
+       memalloc_use_memcg(group->memcg);
+
         if (fanotify_is_perm_event(mask)) {
                 struct fanotify_perm_event_info *pevent;
  
                 pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
                 if (!pevent)
-                       return NULL;
+                       goto out;
                 event = &pevent->fae;
                 pevent->response = 0;
                 goto init;
         }
         event = kmem_cache_alloc(fanotify_event_cachep, gfp);
         if (!event)
-               return NULL;
+               goto out;
  init: __maybe_unused
         fsnotify_init_event(&event->fse, inode, mask);
         event->tgid = get_pid(task_tgid(current));
@@ -174,6 +178,8 @@ init: __maybe_unused
                 event->path.mnt = NULL;
                 event->path.dentry = NULL;
         }
+out:
+       memalloc_unuse_memcg();
         return event;
  }
  
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c

index ec4d8c59d0e379df56efef0c86d3d303326ff071..0cf45041dc326e1bfa2c5f834bb9d47de88094cb 100644 (file)
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,7 @@
  #include <linux/uaccess.h>
  #include <linux/compat.h>
  #include <linux/sched/signal.h>
+#include <linux/memcontrol.h>
  
  #include <asm/ioctls.h>
  
@@ -756,6 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
  
         group->fanotify_data.user = user;
         atomic_inc(&user->fanotify_listeners);
+       group->memcg = get_mem_cgroup_from_mm(current->mm);
  
         oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
         if (unlikely(!oevent)) {
@@ -957,7 +959,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
   */
  static int __init fanotify_user_setup(void)
  {
-       fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+       fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+                                        SLAB_PANIC|SLAB_ACCOUNT);
         fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
         if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
                 fanotify_perm_event_cachep =
diff --git a/fs/notify/group.c b/fs/notify/group.c

index aa5468f23e45ceea0b524019d2c0968b72c02eea..c03b836628769ab5dac9adc6576ad20230ef9711 100644 (file)
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -22,6 +22,7 @@
  #include <linux/srcu.h>
  #include <linux/rculist.h>
  #include <linux/wait.h>
+#include <linux/memcontrol.h>
  
  #include <linux/fsnotify_backend.h>
  #include "fsnotify.h"
@@ -36,6 +37,8 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
         if (group->ops->free_group_priv)
                 group->ops->free_group_priv(group);
  
+       mem_cgroup_put(group->memcg);
+
         kfree(group);
  }
  
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c

index 9ab6dde38a14c346b000716786f1552f87f85698..f4184b4f38154443816a61e89d461f352e83d76f 100644 (file)
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -31,6 +31,7 @@
  #include <linux/types.h>
  #include <linux/sched.h>
  #include <linux/sched/user.h>
+#include <linux/sched/mm.h>
  
  #include "inotify.h"
  
@@ -98,7 +99,11 @@ int inotify_handle_event(struct fsnotify_group *group,
         i_mark = container_of(inode_mark, struct inotify_inode_mark,
                               fsn_mark);
  
-       event = kmalloc(alloc_len, GFP_KERNEL);
+       /* Whoever is interested in the event, pays for the allocation. */
+       memalloc_use_memcg(group->memcg);
+       event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
+       memalloc_unuse_memcg();
+
         if (unlikely(!event)) {
                 /*
                  * Treat lost event due to ENOMEM the same way as queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c

index 1cf5b779d862dc81f9b00454d06babd8fc3acb1f..749c46ababa0d493f14e73e5882c75faaef29d72 100644 (file)
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -38,6 +38,7 @@
  #include <linux/uaccess.h>
  #include <linux/poll.h>
  #include <linux/wait.h>
+#include <linux/memcontrol.h>
  
  #include "inotify.h"
  #include "../fdinfo.h"
@@ -636,6 +637,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
         oevent->name_len = 0;
  
         group->max_events = max_events;
+       group->memcg = get_mem_cgroup_from_mm(current->mm);
  
         spin_lock_init(&group->inotify_data.idr_lock);
         idr_init(&group->inotify_data.idr);
@@ -808,7 +810,8 @@ static int __init inotify_user_setup(void)
  
         BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
  
-       inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
+       inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
+                                              SLAB_PANIC|SLAB_ACCOUNT);
  
         inotify_max_queued_events = 16384;
         init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h

index b38964a7a521e5d204db057a4cd3ac09ac97f3e5..a0c4790c5302c290ea5d557eff46ccd2e14d25ff 100644 (file)
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -84,6 +84,8 @@ struct fsnotify_event_private_data;
  struct fsnotify_fname;
  struct fsnotify_iter_info;
  
+struct mem_cgroup;
+
  /*
   * Each group much define these ops.  The fsnotify infrastructure will call
   * these operations for each relevant group.
@@ -127,6 +129,8 @@ struct fsnotify_event {
   * everything will be cleaned up.
   */
  struct fsnotify_group {
+       const struct fsnotify_ops *ops; /* how this group handles things */
+
         /*
          * How the refcnt is used is up to each group.  When the refcnt hits 0
          * fsnotify will clean up all of the resources associated with this group.
@@ -137,8 +141,6 @@ struct fsnotify_group {
          */
         refcount_t refcnt;              /* things with interest in this group */
  
-       const struct fsnotify_ops *ops; /* how this group handles things */
-
         /* needed to send notification to userspace */
         spinlock_t notification_lock;           /* protect the notification_list */
         struct list_head notification_list;     /* list of event_holder this group needs to send to userspace */
@@ -160,6 +162,8 @@ struct fsnotify_group {
         atomic_t num_marks;             /* 1 for each mark and 1 for not being
                                          * past the point of no return when freeing
                                          * a group */
+       atomic_t user_waits;            /* Number of tasks waiting for user
+                                        * response */
         struct list_head marks_list;    /* all inode marks for this group */
  
         struct fasync_struct *fsn_fa;    /* async notification */
@@ -167,8 +171,8 @@ struct fsnotify_group {
         struct fsnotify_event *overflow_event;  /* Event we queue when the
                                                  * notification list is too
                                                  * full */
-       atomic_t user_waits;            /* Number of tasks waiting for user
-                                        * response */
+
+       struct mem_cgroup *memcg;       /* memcg to charge allocations */
  
         /* groups can define private fields here or use the void *private */
         union {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 42f4719def3205cfc77284507665da719954e132..121e218d2a21808322d1638b67dac27fa5757add 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -373,6 +373,8 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
  bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
  struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
  
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
+
  static inline
  struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
         return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -380,7 +382,8 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
  
  static inline void mem_cgroup_put(struct mem_cgroup *memcg)
  {
-       css_put(&memcg->css);
+       if (memcg)
+               css_put(&memcg->css);
  }
  
  #define mem_cgroup_from_counter(counter, member)       \
@@ -855,6 +858,11 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
         return true;
  }
  
+static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+       return NULL;
+}
+
  static inline void mem_cgroup_put(struct mem_cgroup *memcg)
  {
  }
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 95a5018c338ea6a8b2f84bc32e1ee48fd29da2fe..1827f4a7a6de07501313dd12233c31e06792f638 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1152,6 +1152,9 @@ struct task_struct {
  
         /* Number of pages to reclaim on returning to userland: */
         unsigned int                    memcg_nr_pages_over_high;
+
+       /* Used by memcontrol for targeted memcg charge: */
+       struct mem_cgroup               *active_memcg;
  #endif
  
  #ifdef CONFIG_BLK_CGROUP
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h

index 44d356f5e47c9c99a21a6f6aaca8cb1b49490497..aebb370a000624f0258a7c0a2465a4ea2d3e6acf 100644 (file)
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -248,6 +248,43 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
         current->flags = (current->flags & ~PF_MEMALLOC) | flags;
  }
  
+#ifdef CONFIG_MEMCG
+/**
+ * memalloc_use_memcg - Starts the remote memcg charging scope.
+ * @memcg: memcg to charge.
+ *
+ * This function marks the beginning of the remote memcg charging scope. All the
+ * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
+ * given memcg.
+ *
+ * NOTE: This function is not nesting safe.
+ */
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+       WARN_ON_ONCE(current->active_memcg);
+       current->active_memcg = memcg;
+}
+
+/**
+ * memalloc_unuse_memcg - Ends the remote memcg charging scope.
+ *
+ * This function marks the end of the remote memcg charging scope started by
+ * memalloc_use_memcg().
+ */
+static inline void memalloc_unuse_memcg(void)
+{
+       current->active_memcg = NULL;
+}
+#else
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+}
+
+static inline void memalloc_unuse_memcg(void)
+{
+}
+#endif
+
  #ifdef CONFIG_MEMBARRIER
  enum {
         MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY                = (1U << 0),
diff --git a/kernel/fork.c b/kernel/fork.c

index 33112315b5c0ed4d32f96375fcf0cdf77594f27f..5ee74c113381b1959b27bb24940ff05c9221bbc2 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -871,6 +871,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
         tsk->use_memdelay = 0;
  #endif
  
+#ifdef CONFIG_MEMCG
+       tsk->active_memcg = NULL;
+#endif
         return tsk;
  
  free_stack:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index b836e7f003094778331ac9c469fe2450d9fac98f..bf9cf738c836849d9fb9c238ee4ac437bd9a4901 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -678,9 +678,20 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
  }
  EXPORT_SYMBOL(mem_cgroup_from_task);
  
-static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+/**
+ * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
+ * @mm: mm from which memcg should be extracted. It can be NULL.
+ *
+ * Obtain a reference on mm->memcg and returns it if successful. Otherwise
+ * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
+ * returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
  {
-       struct mem_cgroup *memcg = NULL;
+       struct mem_cgroup *memcg;
+
+       if (mem_cgroup_disabled())
+               return NULL;
  
         rcu_read_lock();
         do {
@@ -700,6 +711,24 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
         rcu_read_unlock();
         return memcg;
  }
+EXPORT_SYMBOL(get_mem_cgroup_from_mm);
+
+/**
+ * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
+ */
+static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+       if (unlikely(current->active_memcg)) {
+               struct mem_cgroup *memcg = root_mem_cgroup;
+
+               rcu_read_lock();
+               if (css_tryget_online(&current->active_memcg->css))
+                       memcg = current->active_memcg;
+               rcu_read_unlock();
+               return memcg;
+       }
+       return get_mem_cgroup_from_mm(current->mm);
+}
  
  /**
   * mem_cgroup_iter - iterate over memory cgroup hierarchy
@@ -2261,7 +2290,7 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
         if (current->memcg_kmem_skip_account)
                 return cachep;
  
-       memcg = get_mem_cgroup_from_mm(current->mm);
+       memcg = get_mem_cgroup_from_current();
         kmemcg_id = READ_ONCE(memcg->kmemcg_id);
         if (kmemcg_id < 0)
                 goto out;
@@ -2345,7 +2374,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
         if (memcg_kmem_bypass())
                 return 0;
  
-       memcg = get_mem_cgroup_from_mm(current->mm);
+       memcg = get_mem_cgroup_from_current();
         if (!mem_cgroup_is_root(memcg)) {
                 ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
                 if (!ret)
author	Shakeel Butt <shakeelb@google.com>
	Fri, 17 Aug 2018 22:46:39 +0000 (15:46 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 17 Aug 2018 23:20:30 +0000 (16:20 -0700)
fs/notify/dnotify/dnotify.c		patch \| blob \| history
fs/notify/fanotify/fanotify.c		patch \| blob \| history
fs/notify/fanotify/fanotify_user.c		patch \| blob \| history
fs/notify/group.c		patch \| blob \| history
fs/notify/inotify/inotify_fsnotify.c		patch \| blob \| history
fs/notify/inotify/inotify_user.c		patch \| blob \| history
include/linux/fsnotify_backend.h		patch \| blob \| history
include/linux/memcontrol.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/sched/mm.h		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history