diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-04-28 13:01:50 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-04-28 13:02:01 -0700 |
commit | 0e55546b189fc5f1ce5149445d7df083f26d4f25 (patch) | |
tree | 0ed58feac725c81dd27a4533bea24c328898e877 /net | |
parent | f3412b3879b4f7c4313b186b03940d4791345534 (diff) | |
parent | 249aca0d3d631660aa3583c6a3559b75b6e971b4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
include/linux/netdevice.h
net/core/dev.c
6510ea973d8d ("net: Use this_cpu_inc() to increment net->core_stats")
794c24e9921f ("net-core: rx_otherhost_dropped to core_stats")
https://lore.kernel.org/all/20220428111903.5f4304e0@canb.auug.org.au/
drivers/net/wan/cosa.c
d48fea8401cf ("net: cosa: fix error check return value of register_chrdev()")
89fbca3307d4 ("net: wan: remove support for COSA and SRP synchronous serial boards")
https://lore.kernel.org/all/20220428112130.1f689e5e@canb.auug.org.au/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
32 files changed, 368 insertions, 170 deletions
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 84312c836549..fe803bee419a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -670,7 +670,7 @@ static void le_conn_timeout(struct work_struct *work) /* Disable LE Advertising */ le_disable_advertising(hdev); hci_dev_lock(hdev); - hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); hci_dev_unlock(hdev); return; } @@ -873,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) EXPORT_SYMBOL(hci_get_route); /* This function requires the caller holds hdev->lock */ -void hci_le_conn_failed(struct hci_conn *conn, u8 status) +static void hci_le_conn_failed(struct hci_conn *conn, u8 status) { struct hci_dev *hdev = conn->hdev; struct hci_conn_params *params; @@ -886,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) params->conn = NULL; } - conn->state = BT_CLOSED; - /* If the status indicates successful cancellation of * the attempt (i.e. Unknown Connection Id) there's no point of * notifying failure since we'll go back to keep trying to @@ -899,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); - hci_connect_cfm(conn, status); - - hci_conn_del(conn); - /* Since we may have temporarily stopped the background scanning in * favor of connection establishment, we should restart it. */ @@ -914,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) hci_enable_advertising(hdev); } +/* This function requires the caller holds hdev->lock */ +void hci_conn_failed(struct hci_conn *conn, u8 status) +{ + struct hci_dev *hdev = conn->hdev; + + bt_dev_dbg(hdev, "status 0x%2.2x", status); + + switch (conn->type) { + case LE_LINK: + hci_le_conn_failed(conn, status); + break; + case ACL_LINK: + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + break; + } + + conn->state = BT_CLOSED; + hci_connect_cfm(conn, status); + hci_conn_del(conn); +} + static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) { struct hci_conn *conn = data; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index abaabfae19cc..66451661283c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2834,7 +2834,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status) bt_dev_dbg(hdev, "status 0x%2.2x", status); /* All connection failure handling is taken care of by the - * hci_le_conn_failed function which is triggered by the HCI + * hci_conn_failed function which is triggered by the HCI * request completion callbacks used for connecting. */ if (status) @@ -2859,7 +2859,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status) bt_dev_dbg(hdev, "status 0x%2.2x", status); /* All connection failure handling is taken care of by the - * hci_le_conn_failed function which is triggered by the HCI + * hci_conn_failed function which is triggered by the HCI * request completion callbacks used for connecting. */ if (status) @@ -3067,18 +3067,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, { struct hci_ev_conn_complete *ev = data; struct hci_conn *conn; + u8 status = ev->status; - if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle"); - return; - } - - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { + /* In case of error status and there is no connection pending + * just unlock as there is nothing to cleanup. + */ + if (ev->status) + goto unlock; + /* Connection may not exist if auto-connected. Check the bredr * allowlist to see if this device is allowed to auto connect. * If link is an ACL type, create a connection class @@ -3122,8 +3124,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (!ev->status) { + if (!status) { conn->handle = __le16_to_cpu(ev->handle); + if (conn->handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", + conn->handle, HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + goto done; + } if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; @@ -3164,19 +3172,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } - } else { - conn->state = BT_CLOSED; - if (conn->type == ACL_LINK) - mgmt_connect_failed(hdev, &conn->dst, conn->type, - conn->dst_type, ev->status); } if (conn->type == ACL_LINK) hci_sco_setup(conn, ev->status); - if (ev->status) { - hci_connect_cfm(conn, ev->status); - hci_conn_del(conn); +done: + if (status) { + hci_conn_failed(conn, status); } else if (ev->link_type == SCO_LINK) { switch (conn->setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_CVSD: @@ -3185,7 +3188,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, break; } - hci_connect_cfm(conn, ev->status); + hci_connect_cfm(conn, status); } unlock: @@ -4676,6 +4679,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, { struct hci_ev_sync_conn_complete *ev = data; struct hci_conn *conn; + u8 status = ev->status; switch (ev->link_type) { case SCO_LINK: @@ -4690,12 +4694,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, return; } - if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle"); - return; - } - - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); + bt_dev_dbg(hdev, "status 0x%2.2x", status); hci_dev_lock(hdev); @@ -4729,9 +4728,17 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, goto unlock; } - switch (ev->status) { + switch (status) { case 0x00: conn->handle = __le16_to_cpu(ev->handle); + if (conn->handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", + conn->handle, HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + conn->state = BT_CLOSED; + break; + } + conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -4775,8 +4782,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data, } } - hci_connect_cfm(conn, ev->status); - if (ev->status) + hci_connect_cfm(conn, status); + if (status) hci_conn_del(conn); unlock: @@ -5527,11 +5534,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, struct smp_irk *irk; u8 addr_type; - if (handle > HCI_CONN_HANDLE_MAX) { - bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle"); - return; - } - hci_dev_lock(hdev); /* All controllers implicitly stop advertising in the event of a @@ -5541,6 +5543,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn = hci_lookup_le_connect(hdev); if (!conn) { + /* In case of error status and there is no connection pending + * just unlock as there is nothing to cleanup. + */ + if (status) + goto unlock; + conn = hci_conn_add(hdev, LE_LINK, bdaddr, role); if (!conn) { bt_dev_err(hdev, "no memory for new connection"); @@ -5603,8 +5611,14 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL); + if (handle > HCI_CONN_HANDLE_MAX) { + bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle, + HCI_CONN_HANDLE_MAX); + status = HCI_ERROR_INVALID_PARAMETERS; + } + if (status) { - hci_le_conn_failed(conn, status); + hci_conn_failed(conn, status); goto unlock; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8f4c5698913d..13600bf120b0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4408,12 +4408,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { + int err; + switch (conn->state) { case BT_CONNECTED: case BT_CONFIG: return hci_disconnect_sync(hdev, conn, reason); case BT_CONNECT: - return hci_connect_cancel_sync(hdev, conn); + err = hci_connect_cancel_sync(hdev, conn); + /* Cleanup hci_conn object if it cannot be cancelled as it + * likelly means the controller and host stack are out of sync. + */ + if (err) + hci_conn_failed(conn, err); + + return err; case BT_CONNECT2: return hci_reject_conn_sync(hdev, conn, reason); default: diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 7a1579c91432..8d54fef9a568 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -108,6 +108,7 @@ struct xdp_test_data { struct page_pool *pp; struct xdp_frame **frames; struct sk_buff **skbs; + struct xdp_mem_info mem; u32 batch_size; u32 frame_cnt; }; @@ -147,7 +148,6 @@ static void xdp_test_run_init_page(struct page *page, void *arg) static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx) { - struct xdp_mem_info mem = {}; struct page_pool *pp; int err = -ENOMEM; struct page_pool_params pp_params = { @@ -174,7 +174,7 @@ static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c } /* will copy 'mem.id' into pp->xdp_mem_id */ - err = xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pp); + err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp); if (err) goto err_mmodel; @@ -202,6 +202,7 @@ err_skbs: static void xdp_test_run_teardown(struct xdp_test_data *xdp) { + xdp_unreg_mem_model(&xdp->mem); page_pool_destroy(xdp->pp); kfree(xdp->frames); kfree(xdp->skbs); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 81400e0b26ac..8f3d76c751dd 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -354,6 +354,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev, attr.orig_dev = br_dev; vg = br_vlan_group(br); + if (!vg) + return 0; list_for_each_entry(v, &vg->vlan_list, vlist) { if (v->msti) { diff --git a/net/core/dev.c b/net/core/dev.c index e09cd202fc57..7a61690d3137 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10358,7 +10358,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); -struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) +struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev) { struct net_device_core_stats __percpu *p; @@ -10369,11 +10369,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) free_percpu(p); /* This READ_ONCE() pairs with the cmpxchg() above */ - p = READ_ONCE(dev->core_stats); - if (!p) - return NULL; - - return this_cpu_ptr(p); + return READ_ONCE(dev->core_stats); } EXPORT_SYMBOL(netdev_core_stats_alloc); @@ -10410,10 +10406,10 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, for_each_possible_cpu(i) { core_stats = per_cpu_ptr(p, i); - storage->rx_dropped += local_read(&core_stats->rx_dropped); - storage->tx_dropped += local_read(&core_stats->tx_dropped); - storage->rx_nohandler += local_read(&core_stats->rx_nohandler); - storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped); + storage->rx_dropped += READ_ONCE(core_stats->rx_dropped); + storage->tx_dropped += READ_ONCE(core_stats->tx_dropped); + storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler); + storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped); } } return storage; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 349480ef68a5..8b6b5e72b217 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst->lwtstate->orig_output(net, sk, skb); } -static int xmit_check_hhlen(struct sk_buff *skb) +static int xmit_check_hhlen(struct sk_buff *skb, int hh_len) { - int hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); @@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb) bpf = bpf_lwt_lwtunnel(dst->lwtstate); if (bpf->xmit.prog) { + int hh_len = dst->dev->hard_header_len; __be16 proto = skb->protocol; int ret; @@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb) /* If the header was expanded, headroom might be too * small for L2 header to come, expand as needed. */ - ret = xmit_check_hhlen(skb); + ret = xmit_check_hhlen(skb, hh_len); if (unlikely(ret)) return ret; diff --git a/net/dsa/port.c b/net/dsa/port.c index 8856ed6afd05..48e5a309ca5c 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1652,8 +1652,10 @@ int dsa_port_link_register_of(struct dsa_port *dp) if (ds->ops->phylink_mac_link_down) ds->ops->phylink_mac_link_down(ds, port, MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); + of_node_put(phy_np); return dsa_port_phylink_register(dp); } + of_node_put(phy_np); return 0; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 63da683d4660..5ee0aced9410 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -285,7 +285,7 @@ static void dsa_port_manage_cpu_flood(struct dsa_port *dp) if (other_dp->slave->flags & IFF_ALLMULTI) flags.val |= BR_MCAST_FLOOD; if (other_dp->slave->flags & IFF_PROMISC) - flags.val |= BR_FLOOD; + flags.val |= BR_FLOOD | BR_MCAST_FLOOD; } err = dsa_port_pre_bridge_flags(dp, flags, NULL); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 365caebf51ab..aacee9dd771b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; + __be16 flags = tunnel->parms.o_flags; /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, - tunnel->parms.o_flags, proto, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + flags, proto, tunnel->parms.o_key, + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -581,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) } gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ /dev/null diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 2cb3b852d148..f33c31dd7366 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, EXPORT_SYMBOL(cookie_ecn_ok); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) { @@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return NULL; treq = tcp_rsk(req); + + /* treq->af_specific might be used to perform TCP_MD5 lookup */ + treq->af_specific = af_ops; + treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); @@ -364,7 +369,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, skb); if (!req) goto out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index daff631b9486..cc3de8dc5797 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3867,7 +3867,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_process_tlp_ack(sk, ack, flag); if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { + if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DSACKING_ACK))) { num_dupack = 1; /* Consider if pure acks were aggregated in tcp_add_backlog() */ if (!(flag & FLAG_DATA)) @@ -5450,7 +5451,17 @@ static void tcp_new_space(struct sock *sk) INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); } -static void tcp_check_space(struct sock *sk) +/* Caller made space either from: + * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced) + * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt) + * + * We might be able to generate EPOLLOUT to the application if: + * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2 + * 2) notsent amount (tp->write_seq - tp->snd_nxt) became + * small enough that tcp_stream_memory_free() decides it + * is time to generate EPOLLOUT. + */ +void tcp_check_space(struct sock *sk) { /* pairs with tcp_poll() */ smp_mb(); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6366df7aaf2a..6854bb1fb32b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -531,7 +531,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ - if (newtp->af_specific->md5_lookup(sk, newsk)) + if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req))) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c221f3bce975..5f91a9536e00 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); + tcp_check_space(sk); } /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 617b8187c03d..a8f6d9d06f2e 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) * * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is * called multiple times. We favor the information from the most recently - * sent skb, i.e., the skb with the highest prior_delivered count. + * sent skb, i.e., the skb with the most recently sent time and the highest + * sequence. */ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + u64 tx_tstamp; if (!scb->tx.delivered_mstamp) return; + tx_tstamp = tcp_skb_timestamp_us(skb); if (!rs->prior_delivered || - after(scb->tx.delivered, rs->prior_delivered)) { + tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, + scb->end_seq, rs->last_end_seq)) { rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); + tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, scb->tx.first_tx_mstamp); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 976236736146..5136959b3dc5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, { struct ip6_tnl *tunnel = netdev_priv(dev); __be16 protocol; + __be16 flags; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -739,7 +740,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, if (tunnel->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; - __be16 flags; int tun_hlen; tun_info = skb_tunnel_info_txcheck(skb); @@ -766,19 +766,19 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); } else { - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; - if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) return -ENOMEM; - gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + flags = tunnel->parms.o_flags; + + gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) + : 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, @@ -1056,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* Push GRE header. */ proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) : htons(ETH_P_ERSPAN2); - gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 1da332450d98..8ce60ab89015 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,14 +24,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); + struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { - .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, @@ -39,6 +38,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff }; int err; + if (sk && sk->sk_bound_dev_if) + fl6.flowi6_oif = sk->sk_bound_dev_if; + else if (strict) + fl6.flowi6_oif = dev->ifindex; + else + fl6.flowi6_oif = l3mdev_master_ifindex(dev); + fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys); dst = ip6_route_output(net, sk, &fl6); err = dst->error; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index d1b61d00368e..9cc123f000fb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -170,7 +170,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); if (!req) goto out; diff --git a/net/mctp/device.c b/net/mctp/device.c index f49be882e98e..99a3bda8852f 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev) void mctp_dev_put(struct mctp_dev *mdev) { if (mdev && refcount_dec_and_test(&mdev->refs)) { + kfree(mdev->addrs); dev_put(mdev->dev); kfree_rcu(mdev, rcu); } @@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev) mctp_route_remove_dev(mdev); mctp_neigh_remove_dev(mdev); - kfree(mdev->addrs); mctp_dev_put(mdev); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2c467c422dc6..fb67f1ca2495 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void) pr_info("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", ip_vs_conn_tab_size, - (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024); + (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 8ec55cd72572..204a5cdff5b1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -556,24 +556,14 @@ static bool tcp_in_window(struct nf_conn *ct, } } - } else if (((state->state == TCP_CONNTRACK_SYN_SENT - && dir == IP_CT_DIR_ORIGINAL) - || (state->state == TCP_CONNTRACK_SYN_RECV - && dir == IP_CT_DIR_REPLY)) - && after(end, sender->td_end)) { + } else if (tcph->syn && + after(end, sender->td_end) && + (state->state == TCP_CONNTRACK_SYN_SENT || + state->state == TCP_CONNTRACK_SYN_RECV)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, dataoff, tcph, sender); - } else if (tcph->syn && dir == IP_CT_DIR_REPLY && - state->state == TCP_CONNTRACK_SYN_SENT) { - /* Retransmitted syn-ack, or syn (simultaneous open). * * Re-init state for this direction, just like for the first * syn(-ack) reply, it might differ in seq, ack or tcp options. @@ -581,7 +571,8 @@ static bool tcp_in_window(struct nf_conn *ct, tcp_init_sender(sender, receiver, skb, dataoff, tcph, end, win); - if (!tcph->ack) + + if (dir == IP_CT_DIR_REPLY && !tcph->ack) return true; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 3e1afd10a9b6..55aa55b252b2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, -#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD) +#if IS_ENABLED(CONFIG_NF_FLOW_TABLE) [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = { .procname = "nf_flowtable_udp_timeout", .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index d600a566da32..7325bee7d144 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, *ext = &rbe->ext; return -EEXIST; } else { - p = &parent->rb_left; + overlap = false; + if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; } } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 6d9e8e0a3a7d..05ae5a338b6f 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -54,6 +54,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo } #endif +static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) +{ + const struct net_device *indev = nft_in(pkt); + const struct sk_buff *skb = pkt->skb; + struct sock *sk = NULL; + + if (!indev) + return NULL; + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } + + return sk; +} + static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -67,20 +93,7 @@ static void nft_socket_eval(const struct nft_expr *expr, sk = NULL; if (!sk) - switch(nft_pf(pkt)) { - case NFPROTO_IPV4: - sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); - break; -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - case NFPROTO_IPV6: - sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); - break; -#endif - default: - WARN_ON_ONCE(1); - regs->verdict.code = NFT_BREAK; - return; - } + sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; @@ -224,6 +237,16 @@ static bool nft_socket_reduce(struct nft_regs_track *track, return nft_expr_reduce_bitwise(track, expr); } +static int nft_socket_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT)); +} + static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, @@ -231,6 +254,7 @@ static const struct nft_expr_ops nft_socket_ops = { .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, + .validate = nft_socket_validate, .reduce = nft_socket_reduce, }; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b3815b568e8e..463c4a58d2c3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t) goto out_unlock; } + /* This happens when the response arrives after the timer is triggered. */ + if (!asoc->strreset_chunk) + goto out_unlock; + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF), asoc->state, asoc->ep, asoc, diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index fc7b6eb22143..fce16b9d6e1a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -243,11 +243,27 @@ struct proto smc_proto6 = { }; EXPORT_SYMBOL_GPL(smc_proto6); +static void smc_fback_restore_callbacks(struct smc_sock *smc) +{ + struct sock *clcsk = smc->clcsock->sk; + + write_lock_bh(&clcsk->sk_callback_lock); + clcsk->sk_user_data = NULL; + + smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change); + smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready); + smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space); + smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report); + + write_unlock_bh(&clcsk->sk_callback_lock); +} + static void smc_restore_fallback_changes(struct smc_sock *smc) { if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ smc->clcsock->file->private_data = smc->sk.sk_socket; smc->clcsock->file = NULL; + smc_fback_restore_callbacks(smc); } } @@ -373,6 +389,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); mutex_init(&smc->clcsock_release_lock); + smc_init_saved_callbacks(smc); return sk; } @@ -744,47 +761,73 @@ out: static void smc_fback_state_change(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_state_change); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_data_ready(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_data_ready); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_write_space(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_write_space); + read_unlock_bh(&clcsk->sk_callback_lock); } static void smc_fback_error_report(struct sock *clcsk) { - struct smc_sock *smc = - smc_clcsock_user_data(clcsk); + struct smc_sock *smc; - if (!smc) - return; - smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); + read_lock_bh(&clcsk->sk_callback_lock); + smc = smc_clcsock_user_data(clcsk); + if (smc) + smc_fback_forward_wakeup(smc, clcsk, + smc->clcsk_error_report); + read_unlock_bh(&clcsk->sk_callback_lock); +} + +static void smc_fback_replace_callbacks(struct smc_sock *smc) +{ + struct sock *clcsk = smc->clcsock->sk; + + write_lock_bh(&clcsk->sk_callback_lock); + clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + + smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, + &smc->clcsk_state_change); + smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready, + &smc->clcsk_data_ready); + smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space, + &smc->clcsk_write_space); + smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report, + &smc->clcsk_error_report); + + write_unlock_bh(&clcsk->sk_callback_lock); } static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) { - struct sock *clcsk; int rc = 0; mutex_lock(&smc->clcsock_release_lock); @@ -792,10 +835,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) rc = -EBADF; goto out; } - clcsk = smc->clcsock->sk; - if (smc->use_fallback) - goto out; smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); @@ -810,18 +850,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) * in smc sk->sk_wq and they should be woken up * as clcsock's wait queue is woken up. */ - smc->clcsk_state_change = clcsk->sk_state_change; - smc->clcsk_data_ready = clcsk->sk_data_ready; - smc->clcsk_write_space = clcsk->sk_write_space; - smc->clcsk_error_report = clcsk->sk_error_report; - - clcsk->sk_state_change = smc_fback_state_change; - clcsk->sk_data_ready = smc_fback_data_ready; - clcsk->sk_write_space = smc_fback_write_space; - clcsk->sk_error_report = smc_fback_error_report; - - smc->clcsock->sk->sk_user_data = - (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + smc_fback_replace_callbacks(smc); } out: mutex_unlock(&smc->clcsock_release_lock); @@ -1475,6 +1504,8 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_state = SMC_CLOSED; if (rc == -EPIPE || rc == -EAGAIN) smc->sk.sk_err = EPIPE; + else if (rc == -ECONNREFUSED) + smc->sk.sk_err = ECONNREFUSED; else if (signal_pending(current)) smc->sk.sk_err = -sock_intr_errno(timeo); sock_put(&smc->sk); /* passive closing */ @@ -1594,6 +1625,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) * function; switch it back to the original sk_data_ready function */ new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready; + + /* if new clcsock has also inherited the fallback-specific callback + * functions, switch them back to the original ones. + */ + if (lsmc->use_fallback) { + if (lsmc->clcsk_state_change) + new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change; + if (lsmc->clcsk_write_space) + new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space; + if (lsmc->clcsk_error_report) + new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report; + } + (*new_smc)->clcsock = new_clcsock; out: return rc; @@ -2353,17 +2397,20 @@ out: static void smc_clcsock_data_ready(struct sock *listen_clcsock) { - struct smc_sock *lsmc = - smc_clcsock_user_data(listen_clcsock); + struct smc_sock *lsmc; + read_lock_bh(&listen_clcsock->sk_callback_lock); + lsmc = smc_clcsock_user_data(listen_clcsock); if (!lsmc) - return; + goto out; lsmc->clcsk_data_ready(listen_clcsock); if (lsmc->sk.sk_state == SMC_LISTEN) { sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */ if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work)) sock_put(&lsmc->sk); } +out: + read_unlock_bh(&listen_clcsock->sk_callback_lock); } static int smc_listen(struct socket *sock, int backlog) @@ -2395,10 +2442,12 @@ static int smc_listen(struct socket *sock, int backlog) /* save original sk_data_ready function and establish * smc-specific sk_data_ready function */ - smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready; - smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready; + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); smc->clcsock->sk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, + smc_clcsock_data_ready, &smc->clcsk_data_ready); + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); /* save original ops */ smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops; @@ -2413,7 +2462,11 @@ static int smc_listen(struct socket *sock, int backlog) rc = kernel_listen(smc->clcsock, backlog); if (rc) { - smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); + smc->clcsock->sk->sk_user_data = NULL; + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); goto out; } sk->sk_max_ack_backlog = backlog; diff --git a/net/smc/smc.h b/net/smc/smc.h index ea0620529ebe..5ed765ea0c73 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk) return (struct smc_sock *)sk; } +static inline void smc_init_saved_callbacks(struct smc_sock *smc) +{ + smc->clcsk_state_change = NULL; + smc->clcsk_data_ready = NULL; + smc->clcsk_write_space = NULL; + smc->clcsk_error_report = NULL; +} + static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) { return (struct smc_sock *) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } +/* save target_cb in saved_cb, and replace target_cb with new_cb */ +static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), + void (*new_cb)(struct sock *), + void (**saved_cb)(struct sock *)) +{ + /* only save once */ + if (!*saved_cb) + *saved_cb = *target_cb; + *target_cb = new_cb; +} + +/* restore target_cb to saved_cb, and reset saved_cb to NULL */ +static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *), + void (**saved_cb)(struct sock *)) +{ + if (!*saved_cb) + return; + *target_cb = *saved_cb; + *saved_cb = NULL; +} + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 676cb2333d3c..31db7438857c 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -214,8 +214,11 @@ again: sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); /* wake up accept */ if (smc->clcsock && smc->clcsock->sk) { - smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; + write_lock_bh(&smc->clcsock->sk->sk_callback_lock); + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); smc->clcsock->sk->sk_user_data = NULL; + write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } smc_close_cleanup_listen(sk); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 5e4bd8ba48d3..b12f81a2b44c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -483,11 +483,13 @@ handle_error: copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); copy = min_t(size_t, copy, (max_open_record_len - record->len)); - rc = tls_device_copy_data(page_address(pfrag->page) + - pfrag->offset, copy, msg_iter); - if (rc) - goto handle_error; - tls_append_frag(record, pfrag, copy); + if (copy) { + rc = tls_device_copy_data(page_address(pfrag->page) + + pfrag->offset, copy, msg_iter); + if (rc) + goto handle_error; + tls_append_frag(record, pfrag, copy); + } size -= copy; if (!size) { diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 040c73345b7c..e0a4526ab66b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -639,7 +639,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len if (sk_can_busy_loop(sk)) sk_busy_loop(sk, 1); /* only support non-blocking sockets */ - if (xsk_no_wakeup(sk)) + if (xs->zc && xsk_no_wakeup(sk)) return 0; pool = xs->pool; @@ -967,6 +967,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xp_get_pool(umem_xs->pool); xs->pool = umem_xs->pool; + + /* If underlying shared umem was created without Tx + * ring, allocate Tx descs array that Tx batching API + * utilizes + */ + if (xs->tx && !xs->pool->tx_descs) { + err = xp_alloc_tx_descs(xs->pool, xs); + if (err) { + xp_put_pool(xs->pool); + sockfd_put(sock); + goto out_unlock; + } + } } xdp_get_umem(umem_xs->umem); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index af040ffa14ff..87bdd71c7bb6 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -42,6 +42,16 @@ void xp_destroy(struct xsk_buff_pool *pool) kvfree(pool); } +int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs) +{ + pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), + GFP_KERNEL); + if (!pool->tx_descs) + return -ENOMEM; + + return 0; +} + struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem) { @@ -59,11 +69,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, if (!pool->heads) goto out; - if (xs->tx) { - pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL); - if (!pool->tx_descs) + if (xs->tx) + if (xp_alloc_tx_descs(pool, xs)) goto out; - } pool->chunk_mask = ~((u64)umem->chunk_size - 1); pool->addrs_cnt = umem->size; |