From efd5a4c04e1835acc64eb44818247ca88e80b294 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:47:55 -0800 Subject: mptcp: add the address ID assignment bitmap Currently the address ID set by the netlink PM from user-space is overridden by the kernel. This patch added the address ID assignment bitmap to allow user-space to set the address ID. Use a per netns bitmask id_bitmap (256 bits) to keep track of in-use IDs. And use next_id to keep track of the highest ID currently in use. If the user-space provides an ID at endpoint creation time, try to use it. If already in use, endpoint creation fails. Otherwise pick the first ID available after the highest currently in use, with wrap-around. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 72 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a6d983d80576..7fe7be4eef7e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -36,6 +36,9 @@ struct mptcp_pm_add_entry { u8 retrans_times; }; +#define MAX_ADDR_ID 255 +#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG) + struct pm_nl_pernet { /* protects pernet updates */ spinlock_t lock; @@ -46,6 +49,7 @@ struct pm_nl_pernet { unsigned int local_addr_max; unsigned int subflows_max; unsigned int next_id; + unsigned long id_bitmap[BITMAP_SZ]; }; #define MPTCP_PM_ADDR_MAX 8 @@ -524,10 +528,12 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, /* to keep the code simple, don't do IDR-like allocation for address ID, * just bail when we exceed limits */ - if (pernet->next_id > 255) - goto out; + if (pernet->next_id == MAX_ADDR_ID) + pernet->next_id = 1; if (pernet->addrs >= MPTCP_PM_ADDR_MAX) goto out; + if (test_bit(entry->addr.id, pernet->id_bitmap)) + goto out; /* do not insert duplicate address, differentiate on port only * singled addresses @@ -539,12 +545,30 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, goto out; } + if (!entry->addr.id) { +find_next: + entry->addr.id = find_next_zero_bit(pernet->id_bitmap, + MAX_ADDR_ID + 1, + pernet->next_id); + if ((!entry->addr.id || entry->addr.id > MAX_ADDR_ID) && + pernet->next_id != 1) { + pernet->next_id = 1; + goto find_next; + } + } + + if (!entry->addr.id || entry->addr.id > MAX_ADDR_ID) + goto out; + + __set_bit(entry->addr.id, pernet->id_bitmap); + if (entry->addr.id > pernet->next_id) + pernet->next_id = entry->addr.id; + if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) pernet->add_addr_signal_max++; if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) pernet->local_addr_max++; - entry->addr.id = pernet->next_id++; pernet->addrs++; list_add_tail_rcu(&entry->list, &pernet->local_addr_list); ret = entry->addr.id; @@ -597,6 +621,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) entry->addr = skc_local; entry->addr.ifindex = 0; entry->addr.flags = 0; + entry->addr.id = 0; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); if (ret < 0) kfree(entry); @@ -857,6 +882,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) pernet->addrs--; list_del_rcu(&entry->list); + __clear_bit(entry->addr.id, pernet->id_bitmap); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr); @@ -894,6 +920,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&pernet->lock); list_splice_init(&pernet->local_addr_list, &free_list); __reset_counters(pernet); + pernet->next_id = 1; + bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); __flush_addrs(sock_net(skb->sk), &free_list); return 0; @@ -994,27 +1022,34 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, struct pm_nl_pernet *pernet; int id = cb->args[0]; void *hdr; + int i; pernet = net_generic(net, pm_nl_pernet_id); spin_lock_bh(&pernet->lock); - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (entry->addr.id <= id) - continue; - - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, &mptcp_genl_family, - NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); - if (!hdr) - break; + for (i = id; i < MAX_ADDR_ID + 1; i++) { + if (test_bit(i, pernet->id_bitmap)) { + entry = __lookup_addr_by_id(pernet, i); + if (!entry) + break; + + if (entry->addr.id <= id) + continue; + + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + break; + + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); + break; + } - if (mptcp_nl_fill_addr(msg, entry) < 0) { - genlmsg_cancel(msg, hdr); - break; + id = entry->addr.id; + genlmsg_end(msg, hdr); } - - id = entry->addr.id; - genlmsg_end(msg, hdr); } spin_unlock_bh(&pernet->lock); @@ -1148,6 +1183,7 @@ static int __net_init pm_nl_init_net(struct net *net) INIT_LIST_HEAD_RCU(&pernet->local_addr_list); __reset_counters(pernet); pernet->next_id = 1; + bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_lock_init(&pernet->lock); return 0; } -- cgit v1.2.3-58-ga151 From dc8eb10e95a88143b1957dfcce37fc4c91218e69 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:47:56 -0800 Subject: selftests: mptcp: add testcases for setting the address ID Since the address ID can be set from user-space, some of the tests in pm_netlink.sh will fail. This patch fixed the failures, and add the testcases for setting the address ID. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 41 +++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 15f4f46ca3a9..a617e293734c 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -91,7 +91,7 @@ id 3 flags signal,backup 10.0.1.3" "dump addrs after del" ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" for i in `seq 5 9`; do @@ -102,9 +102,10 @@ check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" for i in `seq 9 256`; do ip netns exec $ns1 ./pm_nl_ctl del $i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1)) done check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 +id 2 flags 10.0.0.9 id 3 flags signal,backup 10.0.1.3 id 4 flags signal 10.0.1.4 id 5 flags signal 10.0.1.5 @@ -127,4 +128,40 @@ ip netns exec $ns1 ./pm_nl_ctl limits 8 8 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 subflows 8" "set limits" +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8 +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 +id 2 flags 10.0.1.2 +id 3 flags 10.0.1.7 +id 4 flags 10.0.1.8 +id 100 flags 10.0.1.3 +id 101 flags 10.0.1.4 +id 254 flags 10.0.1.5 +id 255 flags 10.0.1.6" "set ids" + +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8 +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1 +id 2 flags 10.0.0.4 +id 3 flags 10.0.0.6 +id 4 flags 10.0.0.7 +id 5 flags 10.0.0.8 +id 253 flags 10.0.0.5 +id 254 flags 10.0.0.2 +id 255 flags 10.0.0.3" "wrap-around ids" + exit $ret -- cgit v1.2.3-58-ga151 From 067065422fcd625492efb7ba130adb8ac1bd8078 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:47:57 -0800 Subject: mptcp: add the outgoing MP_PRIO support This patch added the outgoing MP_PRIO logic: In mptcp_pm_nl_mp_prio_send_ack, find the related subflow and subsocket according to the input parameter addr. Save the input priority value to suflow's backup, then set subflow's send_mp_prio flag to true, and save the input priority value to suflow's request_bkup. Finally, send out a pure ACK on the related subsocket. In mptcp_established_options_mp_prio, check whether the subflow's send_mp_prio is set. If it is, this is the packet for sending MP_PRIO. So save subflow->request_bkup value to mptcp_out_options's backup, and change the option type to OPTION_MPTCP_PRIO. In mptcp_write_options, clear the send_mp_prio flag and send out the MP_PRIO suboption with mptcp_out_options's backup value. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/mptcp/pm_netlink.c | 33 +++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 6 ++++++ 3 files changed, 79 insertions(+) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e0d21c0607e5..ef50a8628d77 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -679,6 +679,28 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, return true; } +static bool mptcp_established_options_mp_prio(struct sock *sk, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + if (!subflow->send_mp_prio) + return false; + + if (remaining < TCPOLEN_MPTCP_PRIO) + return false; + + *size = TCPOLEN_MPTCP_PRIO; + opts->suboptions |= OPTION_MPTCP_PRIO; + opts->backup = subflow->request_bkup; + + pr_debug("prio=%d", opts->backup); + + return true; +} + bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -721,6 +743,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, ret = true; } + if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + ret = true; + } + return ret; } @@ -1168,6 +1196,18 @@ mp_capable_done: 0, opts->rm_id); } + if (OPTION_MPTCP_PRIO & opts->suboptions) { + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_prio = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO, + TCPOLEN_MPTCP_PRIO, + opts->backup, TCPOPT_NOP); + } + if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, TCPOLEN_MPTCP_MPJ_SYN, diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7fe7be4eef7e..bf0d13c85a68 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -442,6 +442,39 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) } } +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup) +{ + struct mptcp_subflow_context *subflow; + + pr_debug("bkup=%d", bkup); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct mptcp_addr_info local; + + local_address((struct sock_common *)ssk, &local); + if (!addresses_equal(&local, addr, addr->port)) + continue; + + subflow->backup = bkup; + subflow->send_mp_prio = 1; + subflow->request_bkup = bkup; + + spin_unlock_bh(&msk->pm.lock); + pr_debug("send ack for mp_prio"); + lock_sock(ssk); + tcp_send_ack(ssk); + release_sock(ssk); + spin_lock_bh(&msk->pm.lock); + + return 0; + } + + return -EINVAL; +} + void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d67de793d363..21763e00d990 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -24,6 +24,7 @@ #define OPTION_MPTCP_ADD_ADDR6 BIT(7) #define OPTION_MPTCP_RM_ADDR BIT(8) #define OPTION_MPTCP_FASTCLOSE BIT(9) +#define OPTION_MPTCP_PRIO BIT(10) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -59,6 +60,7 @@ #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 #define TCPOLEN_MPTCP_PORT_LEN 4 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 +#define TCPOLEN_MPTCP_PRIO 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 /* MPTCP MP_JOIN flags */ @@ -396,6 +398,7 @@ struct mptcp_subflow_context { map_valid : 1, mpc_map : 1, backup : 1, + send_mp_prio : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1; /* ctx can be free at ulp release time */ @@ -550,6 +553,9 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id); +int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + u8 bkup); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, -- cgit v1.2.3-58-ga151 From 40453a5c61f4dc43bbbdbf7cefed4eb1bc8d69b7 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:47:58 -0800 Subject: mptcp: add the incoming MP_PRIO support This patch added the incoming MP_PRIO logic: Added a flag named mp_prio in struct mptcp_options_received, to mark the MP_PRIO is received, and save the priority value to struct mptcp_options_received's backup member. Then invoke mptcp_pm_mp_prio_received with the receiving subsocket and the backup value. In mptcp_pm_mp_prio_received, get the subflow context according the input subsocket, and change the subflow's backup as the incoming priority value. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 15 +++++++++++++++ net/mptcp/pm.c | 8 ++++++++ net/mptcp/protocol.h | 5 +++++ 3 files changed, 28 insertions(+) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index ef50a8628d77..adfa96dd991c 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -282,6 +282,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, pr_debug("RM_ADDR: id=%d", mp_opt->rm_id); break; + case MPTCPOPT_MP_PRIO: + if (opsize != TCPOLEN_MPTCP_PRIO) + break; + + mp_opt->mp_prio = 1; + mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; + pr_debug("MP_PRIO: prio=%d", mp_opt->backup); + break; + case MPTCPOPT_MP_FASTCLOSE: if (opsize != TCPOLEN_MPTCP_FASTCLOSE) break; @@ -313,6 +322,7 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->port = 0; mp_opt->rm_addr = 0; mp_opt->dss = 0; + mp_opt->mp_prio = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -1022,6 +1032,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mp_opt.rm_addr = 0; } + if (mp_opt.mp_prio) { + mptcp_pm_mp_prio_received(sk, mp_opt.backup); + mp_opt.mp_prio = 0; + } + if (!mp_opt.dss) return; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index da2ed576f289..0a6ebd0642ec 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -207,6 +207,14 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) spin_unlock_bh(&pm->lock); } +void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); + subflow->backup = bkup; +} + /* path manager helpers */ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 21763e00d990..d6400ad2d615 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -88,6 +88,9 @@ #define MPTCP_ADDR_IPVERSION_4 4 #define MPTCP_ADDR_IPVERSION_6 6 +/* MPTCP MP_PRIO flags */ +#define MPTCP_PRIO_BKUP BIT(0) + /* MPTCP socket flags */ #define MPTCP_DATA_READY 0 #define MPTCP_NOSPACE 1 @@ -118,6 +121,7 @@ struct mptcp_options_received { dss : 1, add_addr : 1, rm_addr : 1, + mp_prio : 1, family : 4, echo : 1, backup : 1; @@ -553,6 +557,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id); +void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, u8 bkup); -- cgit v1.2.3-58-ga151 From 0f9f696a502e1b01fbb137a08f56f157da9d95eb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:47:59 -0800 Subject: mptcp: add set_flags command in PM netlink This patch added a new command MPTCP_PM_CMD_SET_FLAGS in PM netlink: In mptcp_nl_cmd_set_flags, parse the input address, get the backup value according to whether the address's FLAG_BACKUP flag is set from the user-space. Then check whether this address had been added in the local address list. If it had been, then call mptcp_nl_addr_backup to deal with this address. In mptcp_nl_addr_backup, traverse all the existing msk sockets to find the relevant sockets, and call mptcp_pm_nl_mp_prio_send_ack to send out a MP_PRIO ACK packet. Finally in mptcp_nl_cmd_set_flags, set or clear the address's FLAG_BACKUP flag. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + net/mptcp/pm_netlink.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 9762660df741..3674a451a18c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -82,6 +82,7 @@ enum { MPTCP_PM_CMD_FLUSH_ADDRS, MPTCP_PM_CMD_SET_LIMITS, MPTCP_PM_CMD_GET_LIMITS, + MPTCP_PM_CMD_SET_FLAGS, __MPTCP_PM_CMD_AFTER_LAST }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index bf0d13c85a68..8f80099f1657 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1164,6 +1164,66 @@ fail: return -EMSGSIZE; } +static int mptcp_nl_addr_backup(struct net *net, + struct mptcp_addr_info *addr, + u8 bkup) +{ + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + int ret = -EINVAL; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + if (list_empty(&msk->conn_list)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return ret; +} + +static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); + struct mptcp_pm_addr_entry addr, *entry; + struct net *net = sock_net(skb->sk); + u8 bkup = 0; + int ret; + + ret = mptcp_pm_parse_addr(attr, info, true, &addr); + if (ret < 0) + return ret; + + if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + bkup = 1; + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if (addresses_equal(&entry->addr, &addr.addr, true)) { + ret = mptcp_nl_addr_backup(net, &entry->addr, bkup); + if (ret) + return ret; + + if (bkup) + entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + } + } + + return 0; +} + static const struct genl_small_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_ADD_ADDR, @@ -1194,6 +1254,11 @@ static const struct genl_small_ops mptcp_pm_ops[] = { .cmd = MPTCP_PM_CMD_GET_LIMITS, .doit = mptcp_nl_cmd_get_limits, }, + { + .cmd = MPTCP_PM_CMD_SET_FLAGS, + .doit = mptcp_nl_cmd_set_flags, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family mptcp_genl_family __ro_after_init = { -- cgit v1.2.3-58-ga151 From 6e8b244a3e9d989f8356c77211b678c106f23a27 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:48:00 -0800 Subject: selftests: mptcp: add set_flags command in pm_nl_ctl This patch added the set_flags command in pm_nl_ctl, currently we can only set two flags: backup and nobackup. The set_flags command can be used like this: # pm_nl_ctl set 10.0.0.1 flags backup # pm_nl_ctl set 10.0.0.1 flags nobackup Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 87 ++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index b24a2f17d415..abc269e96a07 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -24,10 +24,11 @@ static void syntax(char *argv[]) { - fprintf(stderr, "%s add|get|del|flush|dump|accept []\n", argv[0]); + fprintf(stderr, "%s add|get|set|del|flush|dump|accept []\n", argv[0]); fprintf(stderr, "\tadd [flags signal|subflow|backup] [id ] [dev ] \n"); fprintf(stderr, "\tdel \n"); fprintf(stderr, "\tget \n"); + fprintf(stderr, "\tset [flags backup|nobackup]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [ ]\n"); @@ -584,6 +585,88 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[]) return 0; } +int set_flags(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + u_int32_t flags = 0; + u_int16_t family; + int nest_start; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SET_FLAGS, + MPTCP_PM_VER); + + if (argc < 3) + syntax(argv); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[2]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + for (arg = 3; arg < argc; arg++) { + if (!strcmp(argv[arg], "flags")) { + char *tok, *str; + + /* flags */ + if (++arg >= argc) + error(1, 0, " missing flags value"); + + /* do not support flag list yet */ + for (str = argv[arg]; (tok = strtok(str, ",")); + str = NULL) { + if (!strcmp(tok, "backup")) + flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (strcmp(tok, "nobackup")) + error(1, errno, + "unknown flag %s", argv[arg]); + } + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else { + error(1, 0, "unknown keyword %s", argv[arg]); + } + } + nest->rta_len = off - nest_start; + + do_nl_req(fd, nh, off, 0); + return 0; +} + int main(int argc, char *argv[]) { int fd, pm_family; @@ -609,6 +692,8 @@ int main(int argc, char *argv[]) return dump_addrs(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "limits")) return get_set_limits(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "set")) + return set_flags(fd, pm_family, argc, argv); fprintf(stderr, "unknown sub-command: %s", argv[1]); syntax(argv); -- cgit v1.2.3-58-ga151 From 0be2ac287bcc8a5b60d7c9ab11892a774052d269 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:48:01 -0800 Subject: mptcp: add the mibs for MP_PRIO This patch added the mibs for MP_PRIO, MPTCP_MIB_MPPRIOTX for transmitting of the MP_PRIO suboption, and MPTCP_MIB_MPPRIORX for receiving of it. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/mib.c | 2 ++ net/mptcp/mib.h | 2 ++ net/mptcp/options.c | 1 + net/mptcp/pm_netlink.c | 2 ++ 4 files changed, 7 insertions(+) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index b921cbdd9aaa..8ca196489893 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -31,6 +31,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), + SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), + SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 47bcecce1106..63914a5ef6a5 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -24,6 +24,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ + MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ + MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index adfa96dd991c..c9643344a8d7 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1034,6 +1034,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (mp_opt.mp_prio) { mptcp_pm_mp_prio_received(sk, mp_opt.backup); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); mp_opt.mp_prio = 0; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 8f80099f1657..9b1f6298bbdb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -452,6 +452,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct sock *sk = (struct sock *)msk; struct mptcp_addr_info local; local_address((struct sock_common *)ssk, &local); @@ -461,6 +462,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, subflow->backup = bkup; subflow->send_mp_prio = 1; subflow->request_bkup = bkup; + __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX); spin_unlock_bh(&msk->pm.lock); pr_debug("send ack for mp_prio"); -- cgit v1.2.3-58-ga151 From 718eb44e5c1e9594d6cebc1798a73c1a314de7e2 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jan 2021 16:48:02 -0800 Subject: selftests: mptcp: add the MP_PRIO testcases This patch added the MP_PRIO testcases: Add a new argument bkup for run_tests and do_transfer, it can be set as "backup" or "nobackup", the default value is "". Add a new function chk_prio_nr to check the MP_PRIO related MIB counters. The output looks like this: 29 single subflow, backup syn[ ok ] - synack[ ok ] - ack[ ok ] ptx[ ok ] - prx [ ok ] 30 single address, backup syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] ptx[ ok ] - prx [ ok ] Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 72 ++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9aa9624cff97..f74cd993b168 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -212,6 +212,7 @@ do_transfer() rm_nr_ns1="$7" rm_nr_ns2="$8" speed="$9" + bkup="${10}" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -297,6 +298,18 @@ do_transfer() fi fi + if [ ! -z $bkup ]; then + sleep 1 + for netns in "$ns1" "$ns2"; do + dump=(`ip netns exec $netns ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + addr=${dump[${#dump[@]} - 1]} + backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup" + $backup + fi + done + fi + wait $cpid retc=$? wait $spid @@ -358,6 +371,7 @@ run_tests() rm_nr_ns1="${5:-0}" rm_nr_ns2="${6:-0}" speed="${7:-fast}" + bkup="${8:-""}" lret=0 oldin="" @@ -372,7 +386,7 @@ run_tests() fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed} + ${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed} ${bkup} lret=$? if [ "$test_linkfail" -eq 1 ];then @@ -509,6 +523,43 @@ chk_rm_nr() fi } +chk_prio_nr() +{ + local mp_prio_nr_tx=$1 + local mp_prio_nr_rx=$2 + local count + local dump_stats + + printf "%-39s %s" " " "ptx" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mp_prio_nr_tx" ]; then + echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - prx " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mp_prio_nr_rx" ]; then + echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -739,6 +790,25 @@ chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 +# single subflow, backup +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup +run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup +chk_join_nr "single subflow, backup" 1 1 1 +chk_prio_nr 0 1 + +# single address, backup +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup +chk_join_nr "single address, backup" 1 1 1 +chk_add_nr 1 1 +chk_prio_nr 1 0 + # single subflow, syncookies reset_with_cookies ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -- cgit v1.2.3-58-ga151