From 5d097056c9a017a3b720849efb5432f37acabbac Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:18:21 -0800 Subject: kmemcg: account certain kmem allocations to memcg Mark those kmem allocations that are known to be easily triggered from userspace as __GFP_ACCOUNT/SLAB_ACCOUNT, which makes them accounted to memcg. For the list, see below: - threadinfo - task_struct - task_delay_info - pid - cred - mm_struct - vm_area_struct and vm_region (nommu) - anon_vma and anon_vma_chain - signal_struct - sighand_struct - fs_struct - files_struct - fdtable and fdtable->full_fds_bits - dentry and external_name - inode for all filesystems. This is the most tedious part, because most filesystems overwrite the alloc_inode method. The list is far from complete, so feel free to add more objects. Nevertheless, it should be close to "account everything" approach and keep most workloads within bounds. Malevolent users will be able to breach the limit, but this was possible even with the former "account everything" approach (simply because it did not account everything in fact). [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 91c2de6f5020..c044d1e8508c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -294,7 +294,7 @@ static int init_inodecache(void) 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD), + SLAB_MEM_SPREAD | SLAB_ACCOUNT), init_once); if (sock_inode_cachep == NULL) return -ENOMEM; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index d81186d34558..14f45bf0410c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1500,7 +1500,7 @@ int register_rpc_pipefs(void) rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", sizeof(struct rpc_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), + SLAB_MEM_SPREAD|SLAB_ACCOUNT), init_once); if (!rpc_inode_cachep) return -ENOMEM; -- cgit v1.2.3-58-ga151 From 9ee11ba4251dddf1b0e507d184b25b1bd7820773 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:19:41 -0800 Subject: memcg: do not allow to disable tcp accounting after limit is set There are two bits defined for cg_proto->flags - MEMCG_SOCK_ACTIVATED and MEMCG_SOCK_ACTIVE - both are set in tcp_update_limit, but the former is never cleared while the latter can be cleared by unsetting the limit. This allows to disable tcp socket accounting for new sockets after it was enabled by writing -1 to memory.kmem.tcp.limit_in_bytes while still guaranteeing that memcg_socket_limit_enabled static key will be decremented on memcg destruction. This functionality looks dubious, because it is not clear what a use case would be. By enabling tcp accounting a user accepts the price. If they then find the performance degradation unacceptable, they can always restart their workload with tcp accounting disabled. It does not seem there is any need to flip it while the workload is running. Besides, it contradicts to how kmem accounting API works: writing whatever to memory.kmem.limit_in_bytes enables kmem accounting for the cgroup in question, after which it cannot be disabled. Therefore one might expect that writing -1 to memory.kmem.tcp.limit_in_bytes just enables socket accounting w/o limiting it, which might be useful by itself, but it isn't true. Since this API peculiarity is not documented anywhere, I propose to drop it. This will allow to simplify the code by dropping cg_proto->flags. Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 +----------- mm/memcontrol.c | 2 +- net/ipv4/tcp_memcontrol.c | 17 +++++------------ 3 files changed, 7 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5c97265c1c6e..78a1ec2e23fc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -85,22 +85,12 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -/* - * Bits in struct cg_proto.flags - */ -enum cg_proto_flags { - /* Currently active and new sockets should be assigned to cgroups */ - MEMCG_SOCK_ACTIVE, - /* It was ever activated; we must disarm static keys on destruction */ - MEMCG_SOCK_ACTIVATED, -}; - struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; + bool active; long sysctl_mem[3]; - unsigned long flags; /* * memcg field is used to find which memcg we belong directly * Each memcg struct can hold more than one cg_proto, so container_of diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4bd6c4513393..0bc140d998ad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -316,7 +316,7 @@ void sock_update_memcg(struct sock *sk) rcu_read_lock(); memcg = mem_cgroup_from_task(current); cg_proto = sk->sk_prot->proto_cgroup(memcg); - if (cg_proto && test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags) && + if (cg_proto && cg_proto->active && css_tryget_online(&memcg->css)) { sk->sk_cgrp = cg_proto; } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 2379c1b4efb2..d07579ada001 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -48,7 +48,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) percpu_counter_destroy(&cg_proto->sockets_allocated); - if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) + if (cg_proto->active) static_key_slow_dec(&memcg_socket_limit_enabled); } @@ -72,11 +72,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) cg_proto->sysctl_mem[i] = min_t(long, nr_pages, sysctl_tcp_mem[i]); - if (nr_pages == PAGE_COUNTER_MAX) - clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); - else { + if (!cg_proto->active) { /* - * The active bit needs to be written after the static_key + * The active flag needs to be written after the static_key * update. This is what guarantees that the socket activation * function is the last one to run. See sock_update_memcg() for * details, and note that we don't mark any socket as belonging @@ -90,14 +88,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * We never race with the readers in sock_update_memcg(), * because when this value change, the code to process it is not * patched in yet. - * - * The activated bit is used to guarantee that no two writers - * will do the update in the same memcg. Without that, we can't - * properly shutdown the static key. */ - if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) - static_key_slow_inc(&memcg_socket_limit_enabled); - set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); + static_key_slow_inc(&memcg_socket_limit_enabled); + cg_proto->active = true; } return 0; -- cgit v1.2.3-58-ga151 From 3d596f7b907b0281b997cf30c92994a71ad0a1a9 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:05 -0800 Subject: net: tcp_memcontrol: protect all tcp_memcontrol calls by jump-label Move the jump-label from sock_update_memcg() and sock_release_memcg() to the callsite, and so eliminate those function calls when socket accounting is not enabled. This also eliminates the need for dummy functions because the calls will be optimized away if the Kconfig options are not enabled. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 -------- mm/memcontrol.c | 56 +++++++++++++++++++++------------------------- net/core/sock.c | 9 ++------ net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_ipv4.c | 4 +++- 5 files changed, 32 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d0c724f53691..85c437b0cbc0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -694,17 +694,8 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); -#else -static inline void sock_update_memcg(struct sock *sk) -{ -} -static inline void sock_release_memcg(struct sock *sk) -{ -} -#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ #ifdef CONFIG_MEMCG_KMEM extern struct static_key memcg_kmem_enabled_key; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 44ed2dee8f0c..d9344dad207e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -293,46 +293,40 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) void sock_update_memcg(struct sock *sk) { - if (mem_cgroup_sockets_enabled) { - struct mem_cgroup *memcg; - struct cg_proto *cg_proto; + struct mem_cgroup *memcg; + struct cg_proto *cg_proto; - BUG_ON(!sk->sk_prot->proto_cgroup); + BUG_ON(!sk->sk_prot->proto_cgroup); - /* Socket cloning can throw us here with sk_cgrp already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_cgrp) { - BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg)); - css_get(&sk->sk_cgrp->memcg->css); - return; - } + /* Socket cloning can throw us here with sk_cgrp already + * filled. It won't however, necessarily happen from + * process context. So the test for root memcg given + * the current task's memcg won't help us in this case. + * + * Respecting the original socket's memcg is a better + * decision in this case. + */ + if (sk->sk_cgrp) { + BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg)); + css_get(&sk->sk_cgrp->memcg->css); + return; + } - rcu_read_lock(); - memcg = mem_cgroup_from_task(current); - cg_proto = sk->sk_prot->proto_cgroup(memcg); - if (cg_proto && cg_proto->active && - css_tryget_online(&memcg->css)) { - sk->sk_cgrp = cg_proto; - } - rcu_read_unlock(); + rcu_read_lock(); + memcg = mem_cgroup_from_task(current); + cg_proto = sk->sk_prot->proto_cgroup(memcg); + if (cg_proto && cg_proto->active && + css_tryget_online(&memcg->css)) { + sk->sk_cgrp = cg_proto; } + rcu_read_unlock(); } EXPORT_SYMBOL(sock_update_memcg); void sock_release_memcg(struct sock *sk) { - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - struct mem_cgroup *memcg; - WARN_ON(!sk->sk_cgrp->memcg); - memcg = sk->sk_cgrp->memcg; - css_put(&sk->sk_cgrp->memcg->css); - } + WARN_ON(!sk->sk_cgrp->memcg); + css_put(&sk->sk_cgrp->memcg->css); } struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) diff --git a/net/core/sock.c b/net/core/sock.c index 51270238e269..6c5dab01105b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1507,12 +1507,6 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); -static void sk_update_clone(const struct sock *sk, struct sock *newsk) -{ - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - sock_update_memcg(newsk); -} - /** * sk_clone_lock - clone a socket, and lock its clone * @sk: the socket to clone @@ -1607,7 +1601,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sk_set_socket(newsk, NULL); newsk->sk_wq = NULL; - sk_update_clone(sk, newsk); + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + sock_update_memcg(newsk); if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7bb1b091efd1..fd17eec93525 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -422,7 +422,8 @@ void tcp_init_sock(struct sock *sk) sk->sk_rcvbuf = sysctl_tcp_rmem[1]; local_bh_disable(); - sock_update_memcg(sk); + if (mem_cgroup_sockets_enabled) + sock_update_memcg(sk); sk_sockets_allocated_inc(sk); local_bh_enable(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 65947c1f4733..eb39e02899e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1818,7 +1818,9 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); - sock_release_memcg(sk); + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); -- cgit v1.2.3-58-ga151 From af95d7df4059cfeab7e7c244f3564214aada7dad Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:08 -0800 Subject: net: tcp_memcontrol: remove dead per-memcg count of allocated sockets The number of allocated sockets is used for calculations in the soft limit phase, where packets are accepted but the socket is under memory pressure. Since there is no soft limit phase in tcp_memcontrol, and memory pressure is only entered when packets are already dropped, this is actually dead code. Remove it. As this is the last user of parent_cg_proto(), remove that too. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - include/net/sock.h | 39 +++------------------------------------ net/ipv4/tcp_memcontrol.c | 3 --- 3 files changed, 3 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 85c437b0cbc0..15acc04ebdd3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -87,7 +87,6 @@ enum mem_cgroup_events_target { struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ - struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; bool active; long sysctl_mem[3]; diff --git a/include/net/sock.h b/include/net/sock.h index d3b035c7362b..1f15937ec208 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1098,19 +1098,9 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET) extern struct static_key memcg_socket_limit_enabled; -static inline struct cg_proto *parent_cg_proto(struct proto *proto, - struct cg_proto *cg_proto) -{ - return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); -} #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) #else #define mem_cgroup_sockets_enabled 0 -static inline struct cg_proto *parent_cg_proto(struct proto *proto, - struct cg_proto *cg_proto) -{ - return NULL; -} #endif static inline bool sk_stream_memory_free(const struct sock *sk) @@ -1236,41 +1226,18 @@ sk_memory_allocated_sub(struct sock *sk, int amt) static inline void sk_sockets_allocated_dec(struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - struct cg_proto *cg_proto = sk->sk_cgrp; - - for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - percpu_counter_dec(&cg_proto->sockets_allocated); - } - - percpu_counter_dec(prot->sockets_allocated); + percpu_counter_dec(sk->sk_prot->sockets_allocated); } static inline void sk_sockets_allocated_inc(struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - struct cg_proto *cg_proto = sk->sk_cgrp; - - for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - percpu_counter_inc(&cg_proto->sockets_allocated); - } - - percpu_counter_inc(prot->sockets_allocated); + percpu_counter_inc(sk->sk_prot->sockets_allocated); } static inline int sk_sockets_allocated_read_positive(struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return percpu_counter_read_positive(&sk->sk_cgrp->sockets_allocated); - - return percpu_counter_read_positive(prot->sockets_allocated); + return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); } static inline int diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index d07579ada001..6759e0d6bba1 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -32,7 +32,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) counter_parent = &parent_cg->memory_allocated; page_counter_init(&cg_proto->memory_allocated, counter_parent); - percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); return 0; } @@ -46,8 +45,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) if (!cg_proto) return; - percpu_counter_destroy(&cg_proto->sockets_allocated); - if (cg_proto->active) static_key_slow_dec(&memcg_socket_limit_enabled); -- cgit v1.2.3-58-ga151 From 80f23124f57c77915a7b4201d8dcba38a38b23f0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:11 -0800 Subject: net: tcp_memcontrol: simplify the per-memcg limit access tcp_memcontrol replicates the global sysctl_mem limit array per cgroup, but it only ever sets these entries to the value of the memory_allocated page_counter limit. Use the latter directly. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - include/net/sock.h | 8 +++++--- net/ipv4/tcp_memcontrol.c | 8 -------- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 15acc04ebdd3..6c91c1b73951 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -89,7 +89,6 @@ struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ int memory_pressure; bool active; - long sysctl_mem[3]; /* * memcg field is used to find which memcg we belong directly * Each memcg struct can hold more than one cg_proto, so container_of diff --git a/include/net/sock.h b/include/net/sock.h index 1f15937ec208..8b1f8e5d3a48 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1162,10 +1162,12 @@ static inline void sk_enter_memory_pressure(struct sock *sk) static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long *prot = sk->sk_prot->sysctl_mem; + long limit = sk->sk_prot->sysctl_mem[index]; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - prot = sk->sk_cgrp->sysctl_mem; - return prot[index]; + limit = min_t(long, limit, sk->sk_cgrp->memory_allocated.limit); + + return limit; } static inline void memcg_memory_allocated_add(struct cg_proto *prot, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 6759e0d6bba1..ef4268d12e43 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -21,9 +21,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) if (!cg_proto) return 0; - cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; - cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; - cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; cg_proto->memory_pressure = 0; cg_proto->memcg = memcg; @@ -54,7 +51,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) { struct cg_proto *cg_proto; - int i; int ret; cg_proto = tcp_prot.proto_cgroup(memcg); @@ -65,10 +61,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) if (ret) return ret; - for (i = 0; i < 3; i++) - cg_proto->sysctl_mem[i] = min_t(long, nr_pages, - sysctl_tcp_mem[i]); - if (!cg_proto->active) { /* * The active flag needs to be written after the static_key -- cgit v1.2.3-58-ga151 From e805605c721021879a1469bdae45c6f80bc985f4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:14 -0800 Subject: net: tcp_memcontrol: sanitize tcp memory accounting callbacks There won't be a tcp control soft limit, so integrating the memcg code into the global skmem limiting scheme complicates things unnecessarily. Replace this with simple and clear charge and uncharge calls--hidden behind a jump label--to account skb memory. Note that this is not purely aesthetic: as a result of shoehorning the per-memcg code into the same memory accounting functions that handle the global level, the old code would compare the per-memcg consumption against the smaller of the per-memcg limit and the global limit. This allowed the total consumption of multiple sockets to exceed the global limit, as long as the individual sockets stayed within bounds. After this change, the code will always compare the per-memcg consumption to the per-memcg limit, and the global consumption to the global limit, and thus close this loophole. Without a soft limit, the per-memcg memory pressure state in sockets is generally questionable. However, we did it until now, so we continue to enter it when the hard limit is hit, and packets are dropped, to let other sockets in the cgroup know that they shouldn't grow their transmit windows, either. However, keep it simple in the new callback model and leave memory pressure lazily when the next packet is accepted (as opposed to doing it synchroneously when packets are processed). When packets are dropped, network performance will already be in the toilet, so that should be a reasonable trade-off. As described above, consumption is now checked on the per-memcg level and the global level separately. Likewise, memory pressure states are maintained on both the per-memcg level and the global level, and a socket is considered under pressure when either level asserts as much. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++++----- include/net/sock.h | 64 ++++++---------------------------------------- include/net/tcp.h | 5 ++-- mm/memcontrol.c | 32 +++++++++++++++++++++++ net/core/sock.c | 26 +++++++++++-------- net/ipv4/tcp_output.c | 7 +++-- 6 files changed, 77 insertions(+), 76 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c91c1b73951..e4e77bd1dd39 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -660,12 +660,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) } #endif /* CONFIG_MEMCG */ -enum { - UNDER_LIMIT, - SOFT_LIMIT, - OVER_LIMIT, -}; - #ifdef CONFIG_CGROUP_WRITEBACK struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); @@ -694,6 +688,19 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, struct sock; void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); +bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages); +void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages); +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) +static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto) +{ + return proto->memory_pressure; +} +#else +static inline bool mem_cgroup_under_pressure(struct cg_proto *proto) +{ + return false; +} +#endif #ifdef CONFIG_MEMCG_KMEM extern struct static_key memcg_kmem_enabled_key; diff --git a/include/net/sock.h b/include/net/sock.h index 8b1f8e5d3a48..94a6c1a740b9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1129,8 +1129,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) if (!sk->sk_prot->memory_pressure) return false; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return !!sk->sk_cgrp->memory_pressure; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp && + mem_cgroup_under_socket_pressure(sk->sk_cgrp)) + return true; return !!*sk->sk_prot->memory_pressure; } @@ -1144,9 +1145,6 @@ static inline void sk_leave_memory_pressure(struct sock *sk) if (*memory_pressure) *memory_pressure = 0; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - sk->sk_cgrp->memory_pressure = 0; } static inline void sk_enter_memory_pressure(struct sock *sk) @@ -1154,76 +1152,30 @@ static inline void sk_enter_memory_pressure(struct sock *sk) if (!sk->sk_prot->enter_memory_pressure) return; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - sk->sk_cgrp->memory_pressure = 1; - sk->sk_prot->enter_memory_pressure(sk); } static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long limit = sk->sk_prot->sysctl_mem[index]; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - limit = min_t(long, limit, sk->sk_cgrp->memory_allocated.limit); - - return limit; -} - -static inline void memcg_memory_allocated_add(struct cg_proto *prot, - unsigned long amt, - int *parent_status) -{ - struct page_counter *counter; - - if (page_counter_try_charge(&prot->memory_allocated, amt, &counter)) - return; - - page_counter_charge(&prot->memory_allocated, amt); - *parent_status = OVER_LIMIT; -} - -static inline void memcg_memory_allocated_sub(struct cg_proto *prot, - unsigned long amt) -{ - page_counter_uncharge(&prot->memory_allocated, amt); + return sk->sk_prot->sysctl_mem[index]; } static inline long sk_memory_allocated(const struct sock *sk) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return page_counter_read(&sk->sk_cgrp->memory_allocated); - - return atomic_long_read(prot->memory_allocated); + return atomic_long_read(sk->sk_prot->memory_allocated); } static inline long -sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) +sk_memory_allocated_add(struct sock *sk, int amt) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { - memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status); - /* update the root cgroup regardless */ - atomic_long_add_return(amt, prot->memory_allocated); - return page_counter_read(&sk->sk_cgrp->memory_allocated); - } - - return atomic_long_add_return(amt, prot->memory_allocated); + return atomic_long_add_return(amt, sk->sk_prot->memory_allocated); } static inline void sk_memory_allocated_sub(struct sock *sk, int amt) { - struct proto *prot = sk->sk_prot; - - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - memcg_memory_allocated_sub(sk->sk_cgrp, amt); - - atomic_long_sub(amt, prot->memory_allocated); + atomic_long_sub(amt, sk->sk_prot->memory_allocated); } static inline void sk_sockets_allocated_dec(struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index a80255f4ca33..d9df80deba31 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -289,8 +289,9 @@ extern int tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) { - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - return !!sk->sk_cgrp->memory_pressure; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp && + mem_cgroup_under_socket_pressure(sk->sk_cgrp)) + return true; return tcp_memory_pressure; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d9344dad207e..f5de783860b8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -338,6 +338,38 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(tcp_proto_cgroup); +/** + * mem_cgroup_charge_skmem - charge socket memory + * @proto: proto to charge + * @nr_pages: number of pages to charge + * + * Charges @nr_pages to @proto. Returns %true if the charge fit within + * @proto's configured limit, %false if the charge had to be forced. + */ +bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages) +{ + struct page_counter *counter; + + if (page_counter_try_charge(&proto->memory_allocated, + nr_pages, &counter)) { + proto->memory_pressure = 0; + return true; + } + page_counter_charge(&proto->memory_allocated, nr_pages); + proto->memory_pressure = 1; + return false; +} + +/** + * mem_cgroup_uncharge_skmem - uncharge socket memory + * @proto - proto to uncharge + * @nr_pages - number of pages to uncharge + */ +void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages) +{ + page_counter_uncharge(&proto->memory_allocated, nr_pages); +} + #endif #ifdef CONFIG_MEMCG_KMEM diff --git a/net/core/sock.c b/net/core/sock.c index 6c5dab01105b..89ae859d2dc5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2084,27 +2084,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) struct proto *prot = sk->sk_prot; int amt = sk_mem_pages(size); long allocated; - int parent_status = UNDER_LIMIT; sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - allocated = sk_memory_allocated_add(sk, amt, &parent_status); + allocated = sk_memory_allocated_add(sk, amt); + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp && + !mem_cgroup_charge_skmem(sk->sk_cgrp, amt)) + goto suppress_allocation; /* Under limit. */ - if (parent_status == UNDER_LIMIT && - allocated <= sk_prot_mem_limits(sk, 0)) { + if (allocated <= sk_prot_mem_limits(sk, 0)) { sk_leave_memory_pressure(sk); return 1; } - /* Under pressure. (we or our parents) */ - if ((parent_status > SOFT_LIMIT) || - allocated > sk_prot_mem_limits(sk, 1)) + /* Under pressure. */ + if (allocated > sk_prot_mem_limits(sk, 1)) sk_enter_memory_pressure(sk); - /* Over hard limit (we or our parents) */ - if ((parent_status == OVER_LIMIT) || - (allocated > sk_prot_mem_limits(sk, 2))) + /* Over hard limit. */ + if (allocated > sk_prot_mem_limits(sk, 2)) goto suppress_allocation; /* guarantee minimum buffer size under pressure */ @@ -2153,6 +2153,9 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt); + return 0; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -2168,6 +2171,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount) sk_memory_allocated_sub(sk, amount); sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount); + if (sk_under_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 412a920fe0ec..493b48945f0c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2813,13 +2813,16 @@ begin_fwd: */ void sk_forced_mem_schedule(struct sock *sk, int size) { - int amt, status; + int amt; if (size <= sk->sk_forward_alloc) return; amt = sk_mem_pages(size); sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - sk_memory_allocated_add(sk, amt, &status); + sk_memory_allocated_add(sk, amt); + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + mem_cgroup_charge_skmem(sk->sk_cgrp, amt); } /* Send a FIN. The caller locks the socket for us. -- cgit v1.2.3-58-ga151 From baac50bbc3cdfd184ebf586b1704edbfcee866df Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:17 -0800 Subject: net: tcp_memcontrol: simplify linkage between socket and page counter There won't be any separate counters for socket memory consumed by protocols other than TCP in the future. Remove the indirection and link sockets directly to their owning memory cgroup. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 20 ++++--------- include/net/sock.h | 25 +++-------------- include/net/tcp.h | 4 +-- include/net/tcp_memcontrol.h | 1 - mm/memcontrol.c | 57 +++++++++++++++---------------------- net/core/sock.c | 52 +++++----------------------------- net/ipv4/tcp_ipv4.c | 7 +---- net/ipv4/tcp_memcontrol.c | 67 +++++++++++++++++--------------------------- net/ipv4/tcp_output.c | 4 +-- net/ipv6/tcp_ipv6.c | 3 -- 10 files changed, 69 insertions(+), 171 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e77bd1dd39..7c085e4636ba 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -89,16 +89,6 @@ struct cg_proto { struct page_counter memory_allocated; /* Current allocated memory. */ int memory_pressure; bool active; - /* - * memcg field is used to find which memcg we belong directly - * Each memcg struct can hold more than one cg_proto, so container_of - * won't really cut. - * - * The elegant solution would be having an inverse function to - * proto_cgroup in struct proto, but that means polluting the structure - * for everybody, instead of just for memcg users. - */ - struct mem_cgroup *memcg; }; #ifdef CONFIG_MEMCG @@ -688,15 +678,15 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, struct sock; void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); -bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages); -void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages); +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); +void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) -static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto) +static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { - return proto->memory_pressure; + return memcg->tcp_mem.memory_pressure; } #else -static inline bool mem_cgroup_under_pressure(struct cg_proto *proto) +static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; } diff --git a/include/net/sock.h b/include/net/sock.h index 94a6c1a740b9..be96a8dcbc74 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -71,22 +71,6 @@ #include #include -struct cgroup; -struct cgroup_subsys; -#ifdef CONFIG_NET -int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss); -void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg); -#else -static inline -int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) -{ - return 0; -} -static inline -void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) -{ -} -#endif /* * This structure really needs to be cleaned up. * Most of it is for TCP, and not used by any of @@ -245,7 +229,6 @@ struct sock_common { /* public: */ }; -struct cg_proto; /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -310,7 +293,7 @@ struct cg_proto; * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_cgrp_data: cgroup data for this cgroup - * @sk_cgrp: this socket's cgroup-specific proto data + * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed @@ -446,7 +429,7 @@ struct sock { void *sk_security; #endif struct sock_cgroup_data sk_cgrp_data; - struct cg_proto *sk_cgrp; + struct mem_cgroup *sk_memcg; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); void (*sk_write_space)(struct sock *sk); @@ -1129,8 +1112,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) if (!sk->sk_prot->memory_pressure) return false; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp && - mem_cgroup_under_socket_pressure(sk->sk_cgrp)) + if (mem_cgroup_sockets_enabled && sk->sk_memcg && + mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; return !!*sk->sk_prot->memory_pressure; diff --git a/include/net/tcp.h b/include/net/tcp.h index d9df80deba31..8ea19977ea53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -289,8 +289,8 @@ extern int tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) { - if (mem_cgroup_sockets_enabled && sk->sk_cgrp && - mem_cgroup_under_socket_pressure(sk->sk_cgrp)) + if (mem_cgroup_sockets_enabled && sk->sk_memcg && + mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; return tcp_memory_pressure; diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h index 05b94d9453de..3a17b16ae8aa 100644 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h @@ -1,7 +1,6 @@ #ifndef _TCP_MEMCG_H #define _TCP_MEMCG_H -struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss); void tcp_destroy_cgroup(struct mem_cgroup *memcg); #endif /* _TCP_MEMCG_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f5de783860b8..eaaa86126277 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -294,9 +294,6 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) void sock_update_memcg(struct sock *sk) { struct mem_cgroup *memcg; - struct cg_proto *cg_proto; - - BUG_ON(!sk->sk_prot->proto_cgroup); /* Socket cloning can throw us here with sk_cgrp already * filled. It won't however, necessarily happen from @@ -306,68 +303,58 @@ void sock_update_memcg(struct sock *sk) * Respecting the original socket's memcg is a better * decision in this case. */ - if (sk->sk_cgrp) { - BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg)); - css_get(&sk->sk_cgrp->memcg->css); + if (sk->sk_memcg) { + BUG_ON(mem_cgroup_is_root(sk->sk_memcg)); + css_get(&sk->sk_memcg->css); return; } rcu_read_lock(); memcg = mem_cgroup_from_task(current); - cg_proto = sk->sk_prot->proto_cgroup(memcg); - if (cg_proto && cg_proto->active && - css_tryget_online(&memcg->css)) { - sk->sk_cgrp = cg_proto; - } + if (memcg != root_mem_cgroup && + memcg->tcp_mem.active && + css_tryget_online(&memcg->css)) + sk->sk_memcg = memcg; rcu_read_unlock(); } EXPORT_SYMBOL(sock_update_memcg); void sock_release_memcg(struct sock *sk) { - WARN_ON(!sk->sk_cgrp->memcg); - css_put(&sk->sk_cgrp->memcg->css); -} - -struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) -{ - if (!memcg || mem_cgroup_is_root(memcg)) - return NULL; - - return &memcg->tcp_mem; + WARN_ON(!sk->sk_memcg); + css_put(&sk->sk_memcg->css); } -EXPORT_SYMBOL(tcp_proto_cgroup); /** * mem_cgroup_charge_skmem - charge socket memory - * @proto: proto to charge + * @memcg: memcg to charge * @nr_pages: number of pages to charge * - * Charges @nr_pages to @proto. Returns %true if the charge fit within - * @proto's configured limit, %false if the charge had to be forced. + * Charges @nr_pages to @memcg. Returns %true if the charge fit within + * @memcg's configured limit, %false if the charge had to be forced. */ -bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages) +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) { struct page_counter *counter; - if (page_counter_try_charge(&proto->memory_allocated, + if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated, nr_pages, &counter)) { - proto->memory_pressure = 0; + memcg->tcp_mem.memory_pressure = 0; return true; } - page_counter_charge(&proto->memory_allocated, nr_pages); - proto->memory_pressure = 1; + page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages); + memcg->tcp_mem.memory_pressure = 1; return false; } /** * mem_cgroup_uncharge_skmem - uncharge socket memory - * @proto - proto to uncharge + * @memcg - memcg to uncharge * @nr_pages - number of pages to uncharge */ -void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages) +void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) { - page_counter_uncharge(&proto->memory_allocated, nr_pages); + page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages); } #endif @@ -3653,7 +3640,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) if (ret) return ret; - return mem_cgroup_sockets_init(memcg, ss); + return tcp_init_cgroup(memcg, ss); } static void memcg_deactivate_kmem(struct mem_cgroup *memcg) @@ -3709,7 +3696,7 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg) static_key_slow_dec(&memcg_kmem_enabled_key); WARN_ON(page_counter_read(&memcg->kmem)); } - mem_cgroup_sockets_destroy(memcg); + tcp_destroy_cgroup(memcg); } #else static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) diff --git a/net/core/sock.c b/net/core/sock.c index 89ae859d2dc5..3535bffa45f3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -195,44 +195,6 @@ bool sk_net_capable(const struct sock *sk, int cap) } EXPORT_SYMBOL(sk_net_capable); - -#ifdef CONFIG_MEMCG_KMEM -int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) -{ - struct proto *proto; - int ret = 0; - - mutex_lock(&proto_list_mutex); - list_for_each_entry(proto, &proto_list, node) { - if (proto->init_cgroup) { - ret = proto->init_cgroup(memcg, ss); - if (ret) - goto out; - } - } - - mutex_unlock(&proto_list_mutex); - return ret; -out: - list_for_each_entry_continue_reverse(proto, &proto_list, node) - if (proto->destroy_cgroup) - proto->destroy_cgroup(memcg); - mutex_unlock(&proto_list_mutex); - return ret; -} - -void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) -{ - struct proto *proto; - - mutex_lock(&proto_list_mutex); - list_for_each_entry_reverse(proto, &proto_list, node) - if (proto->destroy_cgroup) - proto->destroy_cgroup(memcg); - mutex_unlock(&proto_list_mutex); -} -#endif - /* * Each address family might have different locking rules, so we have * one slock key per address family: @@ -1601,7 +1563,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sk_set_socket(newsk, NULL); newsk->sk_wq = NULL; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + if (mem_cgroup_sockets_enabled && sk->sk_memcg) sock_update_memcg(newsk); if (newsk->sk_prot->sockets_allocated) @@ -2089,8 +2051,8 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) allocated = sk_memory_allocated_add(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp && - !mem_cgroup_charge_skmem(sk->sk_cgrp, amt)) + if (mem_cgroup_sockets_enabled && sk->sk_memcg && + !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) goto suppress_allocation; /* Under limit. */ @@ -2153,8 +2115,8 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt); + if (mem_cgroup_sockets_enabled && sk->sk_memcg) + mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); return 0; } @@ -2171,8 +2133,8 @@ void __sk_mem_reclaim(struct sock *sk, int amount) sk_memory_allocated_sub(sk, amount); sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount); + if (mem_cgroup_sockets_enabled && sk->sk_memcg) + mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); if (sk_under_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eb39e02899e5..c7d1fb50f381 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1819,7 +1819,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk_sockets_allocated_dec(sk); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + if (mem_cgroup_sockets_enabled && sk->sk_memcg) sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -2343,11 +2343,6 @@ struct proto tcp_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, -#endif -#ifdef CONFIG_MEMCG_KMEM - .init_cgroup = tcp_init_cgroup, - .destroy_cgroup = tcp_destroy_cgroup, - .proto_cgroup = tcp_proto_cgroup, #endif .diag_destroy = tcp_abort, }; diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index ef4268d12e43..e5078259cbe3 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -8,60 +8,47 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + struct page_counter *counter_parent = NULL; /* * The root cgroup does not use page_counters, but rather, * rely on the data already collected by the network * subsystem */ - struct mem_cgroup *parent = parent_mem_cgroup(memcg); - struct page_counter *counter_parent = NULL; - struct cg_proto *cg_proto, *parent_cg; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return 0; - cg_proto->memory_pressure = 0; - cg_proto->memcg = memcg; + memcg->tcp_mem.memory_pressure = 0; - parent_cg = tcp_prot.proto_cgroup(parent); - if (parent_cg) - counter_parent = &parent_cg->memory_allocated; + if (parent) + counter_parent = &parent->tcp_mem.memory_allocated; - page_counter_init(&cg_proto->memory_allocated, counter_parent); + page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent); return 0; } -EXPORT_SYMBOL(tcp_init_cgroup); void tcp_destroy_cgroup(struct mem_cgroup *memcg) { - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return; - if (cg_proto->active) + if (memcg->tcp_mem.active) static_key_slow_dec(&memcg_socket_limit_enabled); - } -EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) { - struct cg_proto *cg_proto; int ret; - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return -EINVAL; - ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages); + ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages); if (ret) return ret; - if (!cg_proto->active) { + if (!memcg->tcp_mem.active) { /* * The active flag needs to be written after the static_key * update. This is what guarantees that the socket activation @@ -79,7 +66,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * patched in yet. */ static_key_slow_inc(&memcg_socket_limit_enabled); - cg_proto->active = true; + memcg->tcp_mem.active = true; } return 0; @@ -123,32 +110,32 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg); u64 val; switch (cft->private) { case RES_LIMIT: - if (!cg_proto) - return PAGE_COUNTER_MAX; - val = cg_proto->memory_allocated.limit; + if (memcg == root_mem_cgroup) + val = PAGE_COUNTER_MAX; + else + val = memcg->tcp_mem.memory_allocated.limit; val *= PAGE_SIZE; break; case RES_USAGE: - if (!cg_proto) + if (memcg == root_mem_cgroup) val = atomic_long_read(&tcp_memory_allocated); else - val = page_counter_read(&cg_proto->memory_allocated); + val = page_counter_read(&memcg->tcp_mem.memory_allocated); val *= PAGE_SIZE; break; case RES_FAILCNT: - if (!cg_proto) + if (memcg == root_mem_cgroup) return 0; - val = cg_proto->memory_allocated.failcnt; + val = memcg->tcp_mem.memory_allocated.failcnt; break; case RES_MAX_USAGE: - if (!cg_proto) + if (memcg == root_mem_cgroup) return 0; - val = cg_proto->memory_allocated.watermark; + val = memcg->tcp_mem.memory_allocated.watermark; val *= PAGE_SIZE; break; default: @@ -161,19 +148,17 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg; - struct cg_proto *cg_proto; memcg = mem_cgroup_from_css(of_css(of)); - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return nbytes; switch (of_cft(of)->private) { case RES_MAX_USAGE: - page_counter_reset_watermark(&cg_proto->memory_allocated); + page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated); break; case RES_FAILCNT: - cg_proto->memory_allocated.failcnt = 0; + memcg->tcp_mem.memory_allocated.failcnt = 0; break; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 493b48945f0c..fda379cd600d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2821,8 +2821,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size) sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; sk_memory_allocated_add(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - mem_cgroup_charge_skmem(sk->sk_cgrp, amt); + if (mem_cgroup_sockets_enabled && sk->sk_memcg) + mem_cgroup_charge_skmem(sk->sk_memcg, amt); } /* Send a FIN. The caller locks the socket for us. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index db9f1c318afc..4ad8edb46f7c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1888,9 +1888,6 @@ struct proto tcpv6_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, -#endif -#ifdef CONFIG_MEMCG_KMEM - .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, .diag_destroy = tcp_abort, -- cgit v1.2.3-58-ga151 From 80e95fe0fdcde2812c341ad4209d62dc1a7af53b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:20 -0800 Subject: mm: memcontrol: generalize the socket accounting jump label The unified hierarchy memory controller is going to use this jump label as well to control the networking callbacks. Move it to the memory controller code and give it a more generic name. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 +++ include/net/sock.h | 7 ------- mm/memcontrol.c | 3 +++ net/core/sock.c | 5 ----- net/ipv4/tcp_memcontrol.c | 4 ++-- 5 files changed, 8 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7c085e4636ba..03090e8e7fff 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -681,11 +681,14 @@ void sock_release_memcg(struct sock *sk); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) +extern struct static_key memcg_sockets_enabled_key; +#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return memcg->tcp_mem.memory_pressure; } #else +#define mem_cgroup_sockets_enabled 0 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; diff --git a/include/net/sock.h b/include/net/sock.h index be96a8dcbc74..b9e7b3d863a0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1079,13 +1079,6 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET) -extern struct static_key memcg_socket_limit_enabled; -#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) -#else -#define mem_cgroup_sockets_enabled 0 -#endif - static inline bool sk_stream_memory_free(const struct sock *sk) { if (sk->sk_wmem_queued >= sk->sk_sndbuf) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index eaaa86126277..08ef3d2ca663 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -291,6 +291,9 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) /* Writing them here to avoid exposing memcg's inner layout */ #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) +struct static_key memcg_sockets_enabled_key; +EXPORT_SYMBOL(memcg_sockets_enabled_key); + void sock_update_memcg(struct sock *sk) { struct mem_cgroup *memcg; diff --git a/net/core/sock.c b/net/core/sock.c index 3535bffa45f3..6c1c8bc93412 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -202,11 +202,6 @@ EXPORT_SYMBOL(sk_net_capable); static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -#if defined(CONFIG_MEMCG_KMEM) -struct static_key memcg_socket_limit_enabled; -EXPORT_SYMBOL(memcg_socket_limit_enabled); -#endif - /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index e5078259cbe3..9a22e2dfd64a 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) return; if (memcg->tcp_mem.active) - static_key_slow_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_sockets_enabled_key); } static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) @@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * because when this value change, the code to process it is not * patched in yet. */ - static_key_slow_inc(&memcg_socket_limit_enabled); + static_key_slow_inc(&memcg_sockets_enabled_key); memcg->tcp_mem.active = true; } -- cgit v1.2.3-58-ga151 From ef12947c9c5a96af549c49f10e5503f0612a397c Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:34 -0800 Subject: mm: memcontrol: switch to the updated jump-label API According to the direct use of struct static_key is deprecated. Update the socket and slab accounting code accordingly. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reported-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++---- mm/memcontrol.c | 12 ++++++------ net/ipv4/tcp_memcontrol.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c5a51039df57..2292468f2a30 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -704,8 +704,8 @@ void sock_release_memcg(struct sock *sk); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #if defined(CONFIG_MEMCG) && defined(CONFIG_INET) -extern struct static_key memcg_sockets_enabled_key; -#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key) +extern struct static_key_false memcg_sockets_enabled_key; +#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { #ifdef CONFIG_MEMCG_KMEM @@ -727,7 +727,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) #endif #ifdef CONFIG_MEMCG_KMEM -extern struct static_key memcg_kmem_enabled_key; +extern struct static_key_false memcg_kmem_enabled_key; extern int memcg_nr_cache_ids; void memcg_get_cache_ids(void); @@ -743,7 +743,7 @@ void memcg_put_cache_ids(void); static inline bool memcg_kmem_enabled(void) { - return static_key_false(&memcg_kmem_enabled_key); + return static_branch_unlikely(&memcg_kmem_enabled_key); } static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index df7f144a5a4b..54eae4f19d80 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -346,7 +346,7 @@ void memcg_put_cache_ids(void) * conditional to this static branch, we'll have to allow modules that does * kmem_cache_alloc and the such to see this symbol as well */ -struct static_key memcg_kmem_enabled_key; +DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); EXPORT_SYMBOL(memcg_kmem_enabled_key); #endif /* CONFIG_MEMCG_KMEM */ @@ -2907,7 +2907,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg, err = page_counter_limit(&memcg->kmem, nr_pages); VM_BUG_ON(err); - static_key_slow_inc(&memcg_kmem_enabled_key); + static_branch_inc(&memcg_kmem_enabled_key); /* * A memory cgroup is considered kmem-active as soon as it gets * kmemcg_id. Setting the id after enabling static branching will @@ -3646,7 +3646,7 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg) { if (memcg->kmem_acct_activated) { memcg_destroy_kmem_caches(memcg); - static_key_slow_dec(&memcg_kmem_enabled_key); + static_branch_dec(&memcg_kmem_enabled_key); WARN_ON(page_counter_read(&memcg->kmem)); } tcp_destroy_cgroup(memcg); @@ -4282,7 +4282,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) #ifdef CONFIG_INET if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) - static_key_slow_inc(&memcg_sockets_enabled_key); + static_branch_inc(&memcg_sockets_enabled_key); #endif /* @@ -4333,7 +4333,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) memcg_destroy_kmem(memcg); #ifdef CONFIG_INET if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) - static_key_slow_dec(&memcg_sockets_enabled_key); + static_branch_dec(&memcg_sockets_enabled_key); #endif __mem_cgroup_free(memcg); } @@ -5557,7 +5557,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) #ifdef CONFIG_INET -struct static_key memcg_sockets_enabled_key; +DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); EXPORT_SYMBOL(memcg_sockets_enabled_key); void sock_update_memcg(struct sock *sk) diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 9a22e2dfd64a..18bc7f745e9c 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) return; if (memcg->tcp_mem.active) - static_key_slow_dec(&memcg_sockets_enabled_key); + static_branch_dec(&memcg_sockets_enabled_key); } static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) @@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * because when this value change, the code to process it is not * patched in yet. */ - static_key_slow_inc(&memcg_sockets_enabled_key); + static_branch_inc(&memcg_sockets_enabled_key); memcg->tcp_mem.active = true; } -- cgit v1.2.3-58-ga151