From ec8f24b7faaf3d4799a7c3f4c1b87f6b02778ad1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 19 May 2019 13:07:45 +0100 Subject: treewide: Add SPDX license identifier - Makefile/Kconfig Add SPDX license identifiers to all Make/Kconfig files which: - Have no license information of any form These files fall under the project license, GPL v2 only. The resulting SPDX license identifier is: GPL-2.0-only Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- net/rds/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net/rds') diff --git a/net/rds/Kconfig b/net/rds/Kconfig index b9092111bc45..38ea7f0f2699 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config RDS tristate "The Reliable Datagram Sockets Protocol" -- cgit v1.2.3-58-ga151 From b50e058746ba29f517e27299447831ab3d93f896 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Mon, 3 Jun 2019 08:48:19 -0400 Subject: net: rds: fix memory leak when unload rds_rdma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When KASAN is enabled, after several rds connections are created, then "rmmod rds_rdma" is run. The following will appear. " BUG rds_ib_incoming (Not tainted): Objects remaining in rds_ib_incoming on __kmem_cache_shutdown() Call Trace: dump_stack+0x71/0xab slab_err+0xad/0xd0 __kmem_cache_shutdown+0x17d/0x370 shutdown_cache+0x17/0x130 kmem_cache_destroy+0x1df/0x210 rds_ib_recv_exit+0x11/0x20 [rds_rdma] rds_ib_exit+0x7a/0x90 [rds_rdma] __x64_sys_delete_module+0x224/0x2c0 ? __ia32_sys_delete_module+0x2c0/0x2c0 do_syscall_64+0x73/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 " This is rds connection memory leak. The root cause is: When "rmmod rds_rdma" is run, rds_ib_remove_one will call rds_ib_dev_shutdown to drop the rds connections. rds_ib_dev_shutdown will call rds_conn_drop to drop rds connections as below. " rds_conn_path_drop(&conn->c_path[0], false); " In the above, destroy is set to false. void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) { atomic_set(&cp->cp_state, RDS_CONN_ERROR); rcu_read_lock(); if (!destroy && rds_destroy_pending(cp->cp_conn)) { rcu_read_unlock(); return; } queue_work(rds_wq, &cp->cp_down_w); rcu_read_unlock(); } In the above function, destroy is set to false. rds_destroy_pending is called. This does not move rds connections to ib_nodev_conns. So destroy is set to true to move rds connections to ib_nodev_conns. In rds_ib_unregister_client, flush_workqueue is called to make rds_wq finsh shutdown rds connections. The function rds_ib_destroy_nodev_conns is called to shutdown rds connections finally. Then rds_ib_recv_exit is called to destroy slab. void rds_ib_recv_exit(void) { kmem_cache_destroy(rds_ib_incoming_slab); kmem_cache_destroy(rds_ib_frag_slab); } The above slab memory leak will not occur again. >From tests, 256 rds connections [root@ca-dev14 ~]# time rmmod rds_rdma real 0m16.522s user 0m0.000s sys 0m8.152s 512 rds connections [root@ca-dev14 ~]# time rmmod rds_rdma real 0m32.054s user 0m0.000s sys 0m15.568s To rmmod rds_rdma with 256 rds connections, about 16 seconds are needed. And with 512 rds connections, about 32 seconds are needed. >From ftrace, when one rds connection is destroyed, " 19) | rds_conn_destroy [rds]() { 19) 7.782 us | rds_conn_path_drop [rds](); 15) | rds_shutdown_worker [rds]() { 15) | rds_conn_shutdown [rds]() { 15) 1.651 us | rds_send_path_reset [rds](); 15) 7.195 us | } 15) + 11.434 us | } 19) 2.285 us | rds_cong_remove_conn [rds](); 19) * 24062.76 us | } " So if many rds connections will be destroyed, this function rds_ib_destroy_nodev_conns uses most of time. Suggested-by: HÃ¥kon Bugge Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- net/rds/ib.c | 2 +- net/rds/ib_recv.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net/rds') diff --git a/net/rds/ib.c b/net/rds/ib.c index 2da9b75bad16..b8d581b779b2 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -87,7 +87,7 @@ static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev) spin_lock_irqsave(&rds_ibdev->spinlock, flags); list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) - rds_conn_drop(ic->conn); + rds_conn_path_drop(&ic->conn->c_path[0], true); spin_unlock_irqrestore(&rds_ibdev->spinlock, flags); } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 8946c89d7392..3cae88cbdaa0 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -168,6 +168,7 @@ void rds_ib_recv_free_caches(struct rds_ib_connection *ic) list_del(&inc->ii_cache_entry); WARN_ON(!list_empty(&inc->ii_frags)); kmem_cache_free(rds_ib_incoming_slab, inc); + atomic_dec(&rds_ib_allocation); } rds_ib_cache_xfer_to_ready(&ic->i_cache_frags); @@ -1057,6 +1058,8 @@ out: void rds_ib_recv_exit(void) { + WARN_ON(atomic_read(&rds_ib_allocation)); + kmem_cache_destroy(rds_ib_incoming_slab); kmem_cache_destroy(rds_ib_frag_slab); } -- cgit v1.2.3-58-ga151 From 85cb928787eab6a2f4ca9d2a798b6f3bed53ced1 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 6 Jun 2019 04:00:03 -0400 Subject: net: rds: fix memory leak in rds_ib_flush_mr_pool When the following tests last for several hours, the problem will occur. Server: rds-stress -r 1.1.1.16 -D 1M Client: rds-stress -r 1.1.1.14 -s 1.1.1.16 -D 1M -T 30 The following will occur. " Starting up.... tsks tx/s rx/s tx+rx K/s mbi K/s mbo K/s tx us/c rtt us cpu % 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 1 0 0 0.00 0.00 0.00 0.00 0.00 -1.00 " >From vmcore, we can find that clean_list is NULL. >From the source code, rds_mr_flushd calls rds_ib_mr_pool_flush_worker. Then rds_ib_mr_pool_flush_worker calls " rds_ib_flush_mr_pool(pool, 0, NULL); " Then in function " int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **ibmr_ret) " ibmr_ret is NULL. In the source code, " ... list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); /* more than one entry in llist nodes */ if (clean_nodes->next) llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); ... " When ibmr_ret is NULL, llist_entry is not executed. clean_nodes->next instead of clean_nodes is added in clean_list. So clean_nodes is discarded. It can not be used again. The workqueue is executed periodically. So more and more clean_nodes are discarded. Finally the clean_list is NULL. Then this problem will occur. Fixes: 1bc144b62524 ("net, rds, Replace xlist in net/rds/xlist.h with llist") Signed-off-by: Zhu Yanjun Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_rdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/rds') diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index d664e9ade74d..0b347f46b2f4 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -428,12 +428,14 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, wait_clean_list_grace(); list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); - if (ibmr_ret) + if (ibmr_ret) { *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); - + clean_nodes = clean_nodes->next; + } /* more than one entry in llist nodes */ - if (clean_nodes->next) - llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); + if (clean_nodes) + llist_add_batch(clean_nodes, clean_tail, + &pool->clean_list); } -- cgit v1.2.3-58-ga151