]> git.baikalelectronics.ru Git - kernel.git/commitdiff
memcg: enable accounting for IP address and routing-related objects
authorVasily Averin <vvs@virtuozzo.com>
Mon, 19 Jul 2021 10:44:31 +0000 (13:44 +0300)
committerDavid S. Miller <davem@davemloft.net>
Tue, 20 Jul 2021 13:00:38 +0000 (06:00 -0700)
An netadmin inside container can use 'ip a a' and 'ip r a'
to assign a large number of ipv4/ipv6 addresses and routing entries
and force kernel to allocate megabytes of unaccounted memory
for long-lived per-netdevice related kernel objects:
'struct in_ifaddr', 'struct inet6_ifaddr', 'struct fib6_node',
'struct rt6_info', 'struct fib_rules' and ip_fib caches.

These objects can be manually removed, though usually they lives
in memory till destroy of its net namespace.

It makes sense to account for them to restrict the host's memory
consumption from inside the memcg-limited container.

One of such objects is the 'struct fib6_node' mostly allocated in
net/ipv6/route.c::__ip6_ins_rt() inside the lock_bh()/unlock_bh() section:

 write_lock_bh(&table->tb6_lock);
 err = fib6_add(&table->tb6_root, rt, info, mxc);
 write_unlock_bh(&table->tb6_lock);

In this case it is not enough to simply add SLAB_ACCOUNT to corresponding
kmem cache. The proper memory cgroup still cannot be found due to the
incorrect 'in_interrupt()' check used in memcg_kmem_bypass().

Obsoleted in_interrupt() does not describe real execution context properly.
>From include/linux/preempt.h:

 The following macros are deprecated and should not be used in new code:
 in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled

To verify the current execution context new macro should be used instead:
 in_task() - We're in task context

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
mm/memcontrol.c
net/core/fib_rules.c
net/ipv4/devinet.c
net/ipv4/fib_trie.c
net/ipv6/addrconf.c
net/ipv6/ip6_fib.c
net/ipv6/route.c

index ae1f5d0cb58105c0957e2ac802965bebc3c36742..1bbf239b06f2a0206b68b3ef50409708ee7c0421 100644 (file)
@@ -968,7 +968,7 @@ static __always_inline bool memcg_kmem_bypass(void)
                return false;
 
        /* Memcg to charge can't be determined. */
-       if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+       if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
                return true;
 
        return false;
index a9f9379750802b8003b404cc98476aa6d784bd86..79df7cd9dbc16d5bd91394bce15ba5e3fd8244c9 100644 (file)
@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 {
        struct fib_rule *r;
 
-       r = kzalloc(ops->rule_size, GFP_KERNEL);
+       r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
        if (r == NULL)
                return -ENOMEM;
 
@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
                        goto errout;
        }
 
-       nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+       nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
        if (!nlrule) {
                err = -ENOMEM;
                goto errout;
index 73721a4448bd4cc1d4057e69f4b18c105ec5e947..d38124bd1b94d33e265479bc0090ce047be0a2d7 100644 (file)
@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
 
 static struct in_ifaddr *inet_alloc_ifa(void)
 {
-       return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
+       return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
 }
 
 static void inet_rcu_free_ifa(struct rcu_head *head)
index 25cf387cca5b2dd2eadd731359e620615910c11d..8060524f425667d008470c2826f2ac835c8e25d2 100644 (file)
@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
 {
        fn_alias_kmem = kmem_cache_create("ip_fib_alias",
                                          sizeof(struct fib_alias),
-                                         0, SLAB_PANIC, NULL);
+                                         0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
 
        trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
                                           LEAF_SIZE,
-                                          0, SLAB_PANIC, NULL);
+                                          0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
 }
 
 struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
index e2f625e394557ad05f197cdd08fee1c157ec76e6..bc330fffb4a8e86fc4cec11fb48b09ec9c41b28d 100644 (file)
@@ -1080,7 +1080,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
                        goto out;
        }
 
-       ifa = kzalloc(sizeof(*ifa), gfp_flags);
+       ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
        if (!ifa) {
                err = -ENOBUFS;
                goto out;
index 2d650dc24349b3369c271328c70a2214b9651fda..a8f118e469b795cf60f8b4312112ecd22cc7df7a 100644 (file)
@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
        int ret = -ENOMEM;
 
        fib6_node_kmem = kmem_cache_create("fib6_nodes",
-                                          sizeof(struct fib6_node),
-                                          0, SLAB_HWCACHE_ALIGN,
+                                          sizeof(struct fib6_node), 0,
+                                          SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
                                           NULL);
        if (!fib6_node_kmem)
                goto out;
index 7b756a7dc03636c63d92491a72643e9a3a3d39c0..5f7286acca338327b107849c8a25acecf6e80509 100644 (file)
@@ -6638,7 +6638,7 @@ int __init ip6_route_init(void)
        ret = -ENOMEM;
        ip6_dst_ops_template.kmem_cachep =
                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
-                                 SLAB_HWCACHE_ALIGN, NULL);
+                                 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
        if (!ip6_dst_ops_template.kmem_cachep)
                goto out;