diff options
author | David S. Miller <davem@davemloft.net> | 2020-07-11 00:46:00 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-07-11 00:46:00 -0700 |
commit | 71930d61025e7d0254f3c682cb1b5242e0499cf3 (patch) | |
tree | 43b642f15d33ae6ba08a31ddf2d50e2915868a5e /net | |
parent | a594920f8747fa032c784c3660d6cd5a8ab291f8 (diff) | |
parent | 1df0d8960499e58963fd6c8ac75e544f2b417b29 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
All conflicts seemed rather trivial, with some guidance from
Saeed Mameed on the tc_ct.c one.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
89 files changed, 608 insertions, 445 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index c8d6a07e23c5..3dd7c972677b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -503,11 +503,10 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev, lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key); } -static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) +static void vlan_dev_set_lockdep_class(struct net_device *dev) { - lockdep_set_class_and_subclass(&dev->addr_list_lock, - &vlan_netdev_addr_lock_key, - subclass); + lockdep_set_class(&dev->addr_list_lock, + &vlan_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); } @@ -601,7 +600,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - vlan_dev_set_lockdep_class(dev, dev->lower_level); + vlan_dev_set_lockdep_class(dev); vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index bfd4ccd80847..b03c469cd01f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -147,6 +147,20 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) return a + (long)b + c + d + (long)e + f; } +struct bpf_fentry_test_t { + struct bpf_fentry_test_t *a; +}; + +int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) +{ + return (long)arg; +} + +int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) +{ + return (long)arg->a; +} + int noinline bpf_modify_return_test(int a, int *b) { *b += 1; @@ -185,6 +199,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { + struct bpf_fentry_test_t arg = {}; u16 side_effect = 0, ret = 0; int b = 2, err = -EFAULT; u32 retval = 0; @@ -197,7 +212,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test3(4, 5, 6) != 15 || bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || + bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || + bpf_fentry_test8(&arg) != 0) goto out; break; case BPF_MODIFY_RETURN: diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index c0f0990f30b6..1905e01c3aa9 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -50,7 +50,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.len = optlen; if (!bpfilter_ops.info.pid) goto out; - n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), + n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), &pos); if (n != sizeof(req)) { pr_err("write fail %zd\n", n); diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index 779e1eb75443..90592af9db61 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -86,7 +86,7 @@ static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p, { struct ethhdr *eth_hdr; struct sk_buff *skb; - u16 *version; + __be16 *version; skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH); if (!skb) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 83490bf73a13..4c4a93abde68 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1007,7 +1007,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); if (skb_transport_offset(skb) + ipv6_transport_len(skb) < - nsrcs_offset + sizeof(_nsrcs)) + nsrcs_offset + sizeof(__nsrcs)) return -EINVAL; _nsrcs = skb_header_pointer(skb, nsrcs_offset, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 65d2c163a24a..a6f348bea29a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -434,7 +434,7 @@ struct net_bridge { struct hlist_head fdb_list; #if IS_ENABLED(CONFIG_BRIDGE_MRP) - struct list_head __rcu mrp_list; + struct list_head mrp_list; #endif }; diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 33b255e38ffe..315eb37d89f0 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -8,7 +8,7 @@ struct br_mrp { /* list of mrp instances */ - struct list_head __rcu list; + struct list_head list; struct net_bridge_port __rcu *p_port; struct net_bridge_port __rcu *s_port; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6393ba930097..54cd568e7c2f 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -690,6 +690,15 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two + * reasons: + * 1) This is always called without any addr_list_lock, so as the + * outermost one here, it must be 0. + * 2) This is called by some callers after unlinking the upper device, + * so the dev->lower_level becomes 1 again. + * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or + * larger. + */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); @@ -911,6 +920,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); diff --git a/net/core/filter.c b/net/core/filter.c index c5e696e6c315..0f22f5ad8e11 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5889,12 +5889,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; - if (skb->protocol == cpu_to_be16(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): iphdr_len = sizeof(struct iphdr); - else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) + break; + case cpu_to_be16(ETH_P_IPV6): iphdr_len = sizeof(struct ipv6hdr); - else + break; + default: return 0; + } if (skb_headlen(skb) < iphdr_len) return 0; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d02df0b6d0d9..142a8824f0a8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, EXPORT_SYMBOL(skb_flow_dissector_init); #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; - struct bpf_prog *attached; if (net == &init_net) { /* BPF flow dissector in the root namespace overrides @@ -86,26 +86,17 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) for_each_net(ns) { if (ns == &init_net) continue; - if (rcu_access_pointer(ns->bpf.progs[type])) + if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST; } } else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace. */ - if (rcu_access_pointer(init_net.bpf.progs[type])) + if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST; } - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (attached == prog) - /* The same program cannot be attached twice */ - return -EINVAL; - - rcu_assign_pointer(net->bpf.progs[type], prog); - if (attached) - bpf_prog_put(attached); return 0; } #endif /* CONFIG_BPF_SYSCALL */ @@ -903,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; @@ -960,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; + struct bpf_prog_array *run_array; rcu_read_lock(); - attached = rcu_dereference(init_net.bpf.progs[type]); - - if (!attached) - attached = rcu_dereference(net->bpf.progs[type]); + run_array = rcu_dereference(init_net.bpf.run_array[type]); + if (!run_array) + run_array = rcu_dereference(net->bpf.run_array[type]); - if (attached) { + if (run_array) { struct bpf_flow_keys flow_keys; struct bpf_flow_dissector ctx = { .flow_keys = &flow_keys, @@ -975,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net, .data_end = data + hlen, }; __be16 n_proto = proto; + struct bpf_prog *prog; if (skb) { ctx.skb = skb; @@ -985,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net, n_proto = skb->protocol; } - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + prog = READ_ONCE(run_array->items[0].prog); + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 351afbf6bfba..6a32a1fd34f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp) return container_of(parser, struct sk_psock, parser); } -static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) +static void sk_psock_skb_redirect(struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; @@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) } } -static void sk_psock_tls_verdict_apply(struct sk_psock *psock, - struct sk_buff *skb, int verdict) +static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) { switch (verdict) { case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_PASS: case __SK_DROP: @@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } + sk_psock_tls_verdict_apply(skb, ret); rcu_read_unlock(); - sk_psock_tls_verdict_apply(psock, skb, ret); return ret; } EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); @@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, } goto out_free; case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_DROP: /* fall-through */ @@ -782,11 +781,18 @@ out_free: static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { - struct sk_psock *psock = sk_psock_from_strp(strp); + struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; + struct sock *sk; rcu_read_lock(); + sk = strp->sk; + psock = sk_psock(sk); + if (unlikely(!psock)) { + kfree_skb(skb); + goto out; + } prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { skb_orphan(skb); @@ -794,8 +800,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } - rcu_read_unlock(); sk_psock_verdict_apply(psock, skb, ret); +out: + rcu_read_unlock(); } static int sk_psock_strp_read_done(struct strparser *strp, int err) diff --git a/net/core/sock.c b/net/core/sock.c index de26fe4ea19f..11d6f77dd562 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1917,7 +1917,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - cgroup_sk_alloc(&newsk->sk_cgrp_data); + cgroup_sk_clone(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4c1123c749bb..119f52a99dc1 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -70,11 +70,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) struct fd f; int ret; + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, attr->attach_type); + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + fdput(f); + return ret; +} + +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) +{ + u32 ufd = attr->target_fd; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + int ret; + + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + prog = bpf_prog_get(attr->attach_bpf_fd); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto put_map; + } + + if (prog->type != ptype) { + ret = -EINVAL; + goto put_prog; + } + + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); +put_prog: + bpf_prog_put(prog); +put_map: fdput(f); return ret; } @@ -1209,27 +1247,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) } int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - u32 which) + struct bpf_prog *old, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - psock_set_prog(&progs->msg_parser, prog); + pprog = &progs->msg_parser; break; case BPF_SK_SKB_STREAM_PARSER: - psock_set_prog(&progs->skb_parser, prog); + pprog = &progs->skb_parser; break; case BPF_SK_SKB_STREAM_VERDICT: - psock_set_prog(&progs->skb_verdict, prog); + pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + if (old) + return psock_replace_prog(pprog, prog, old); + + psock_set_prog(pprog, prog); return 0; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f93f8ace6c56..6ada114bbcca 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || - (jit_enable == 2 && bpf_dump_raw_ok())) { + (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index fb9d096faaa4..5c2072765be7 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -382,10 +382,17 @@ err_dev: } static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, - const struct ethnl_dump_ctx *ctx) + const struct ethnl_dump_ctx *ctx, + struct netlink_callback *cb) { + void *ehdr; int ret; + ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + ðtool_genl_family, 0, ctx->ops->reply_cmd); + if (!ehdr) + return -EMSGSIZE; + ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); rtnl_lock(); ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL); @@ -401,6 +408,10 @@ out: if (ctx->ops->cleanup_data) ctx->ops->cleanup_data(ctx->reply_data); ctx->reply_data->dev = NULL; + if (ret < 0) + genlmsg_cancel(skb, ehdr); + else + genlmsg_end(skb, ehdr); return ret; } @@ -417,7 +428,6 @@ static int ethnl_default_dumpit(struct sk_buff *skb, int s_idx = ctx->pos_idx; int h, idx = 0; int ret = 0; - void *ehdr; rtnl_lock(); for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { @@ -437,26 +447,15 @@ restart_chain: dev_hold(dev); rtnl_unlock(); - ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - ðtool_genl_family, 0, - ctx->ops->reply_cmd); - if (!ehdr) { - dev_put(dev); - ret = -EMSGSIZE; - goto out; - } - ret = ethnl_default_dump_one(skb, dev, ctx); + ret = ethnl_default_dump_one(skb, dev, ctx, cb); dev_put(dev); if (ret < 0) { - genlmsg_cancel(skb, ehdr); if (ret == -EOPNOTSUPP) goto lock_and_cont; if (likely(skb->len)) ret = skb->len; goto out; } - genlmsg_end(skb, ehdr); lock_and_cont: rtnl_lock(); if (net->dev_base_seq != seq) { diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 1032b83d7047..8a927b647829 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -415,6 +415,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version, struct netlink_ext_ack *extack) { + bool unregister = false; struct hsr_priv *hsr; int res; @@ -466,25 +467,27 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_unregister; + unregister = true; + res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack); if (res) - goto err_add_slaves; + goto err_unregister; res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack); if (res) - goto err_add_slaves; + goto err_unregister; hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); return 0; -err_add_slaves: - unregister_netdevice(hsr_dev); err_unregister: hsr_del_ports(hsr); err_add_master: hsr_del_self_node(hsr); + if (unregister) + unregister_netdevice(hsr_dev); return res; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 956a806649f7..e30515f89802 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ipcm_init(&ipc); inet->tos = ip_hdr(skb)->tos; - sk->sk_mark = mark; + ipc.sockc.mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); @@ -710,10 +710,10 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; - sk->sk_mark = mark; ipcm_init(&ipc); ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; + ipc.sockc.mark = mark; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d946356187ed..61f802d5350c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1708,7 +1708,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_sndbuf = sysctl_wmem_default; - sk->sk_mark = fl4.flowi4_mark; + ipc.sockc.mark = fl4.flowi4_mark; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); if (unlikely(err)) { diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 181b7a2a0247..f8b419e2475c 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void) static_branch_dec(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} +EXPORT_SYMBOL(ip_tunnel_parse_protocol); + +const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; +EXPORT_SYMBOL(ip_tunnel_header_ops); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1d9c8cff5ac3..460ca1099e8a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -441,6 +441,7 @@ static const struct net_device_ops vti_netdev_ops = { static void vti_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &vti_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_TUNNEL; ip_tunnel_setup(dev, vti_net_id); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 40fea52c8277..75d35e76bec2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -361,6 +361,7 @@ static const struct net_device_ops ipip_netdev_ops = { static void ipip_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_TUNNEL; dev->flags = IFF_NOARP; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 535427292194..df6fbefe44d4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -786,6 +786,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); + fl4.fl4_icmp_type = user_icmph.type; + fl4.fl4_icmp_code = user_icmph.code; + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1d7076b78e63..a01efa062f6b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2027,7 +2027,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct sk_buff *hint) { struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rt = (struct rtable *)hint; + struct rtable *rt = skb_rtable(hint); struct net *net = dev_net(dev); int err = -EINVAL; u32 tag = 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index de36c91d32ea..254b6a4cc95b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2691,6 +2691,9 @@ int tcp_disconnect(struct sock *sk, int flags) tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); @@ -3246,10 +3249,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: - if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) - err = tp->af_specific->md5_parse(sk, optname, optval, optlen); - else - err = -EINVAL; + err = tp->af_specific->md5_parse(sk, optname, optval, optlen); break; #endif case TCP_USER_TIMEOUT: @@ -4033,11 +4033,14 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { + u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ struct scatterlist sg; - sg_init_one(&sg, key->key, key->keylen); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen); - return crypto_ahash_update(hp->md5_req); + sg_init_one(&sg, key->key, keylen); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); + + /* We use data_race() because tcp_md5_do_add() might change key->key under us */ + return data_race(crypto_ahash_update(hp->md5_req)); } EXPORT_SYMBOL(tcp_md5_hash_key); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 3172e31987be..62878cf26d9c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -197,7 +197,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - if (sk->sk_state != TCP_CLOSE) + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dc77309ea15b..61c808864f6b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4604,6 +4604,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); + sk->sk_data_ready(sk); tcp_drop(sk, skb); return; } @@ -4850,6 +4851,7 @@ queue_and_out: sk_forced_mem_schedule(sk, skb->truesize); else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); + sk->sk_data_ready(sk); goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea0df9fd7618..116c11a0aaed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1111,9 +1111,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index); if (key) { - /* Pre-existing entry - just update that one. */ - memcpy(key->key, newkey, newkeylen); - key->keylen = newkeylen; + /* Pre-existing entry - just update that one. + * Note that the key might be used concurrently. + * data_race() is telling kcsan that we do not care of + * key mismatches, since changing MD5 key on live flows + * can lead to packet drops. + */ + data_race(memcpy(key->key, newkey, newkeylen)); + + /* Pairs with READ_ONCE() in tcp_md5_hash_key(). + * Also note that a reader could catch new key->keylen value + * but old key->key[], this is the reason we use __GFP_ZERO + * at sock_kmalloc() time below these lines. + */ + WRITE_ONCE(key->keylen, newkeylen); + return 0; } @@ -1129,7 +1141,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, rcu_assign_pointer(tp->md5sig_info, md5sig); } - key = sock_kmalloc(sk, sizeof(*key), gfp); + key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); if (!key) return -ENOMEM; if (!tcp_alloc_md5sig_pool()) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 04b70fe31fa2..2d563efcee0d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -700,7 +700,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, - struct tcp_fastopen_cookie *foc) + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -715,7 +716,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, * rather than TS in order to fit in better with old, * buggy kernels, but that was deemed to be unnecessary. */ - ireq->tstamp_ok &= !ireq->sack_ok; + if (synack_type != TCP_SYNACK_COOKIE) + ireq->tstamp_ok &= !ireq->sack_ok; } #endif @@ -3402,7 +3404,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #endif skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc) + sizeof(*th); + foc, synack_type) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 91e0f2fd2523..a4e4912ad607 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -566,7 +566,6 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); - sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -583,6 +582,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = np->ucast_oif; ipcm6_init_sk(&ipc6, np); + ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = icmpv6_route_lookup(net, skb, sk, &fl6); @@ -752,7 +752,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; - sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -780,6 +779,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipcm6_init_sk(&ipc6, np); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + ipc6.sockc.mark = mark; if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 821d96c720b9..a18c378ca5f4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1846,6 +1846,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6_dev_free; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1147f647b9a0..0d964160a9dd 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -905,6 +905,7 @@ static const struct net_device_ops vti6_netdev_ops = { static void vti6_dev_setup(struct net_device *dev) { dev->netdev_ops = &vti6_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = vti6_dev_free; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5852039ca9cf..427b81cbc164 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -431,9 +431,12 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; - if ((!match->fib6_nsiblings && !match->nh) || have_oif_match) + if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; + if (match->nh && have_oif_match && res->nh) + return; + /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. */ @@ -3402,7 +3405,7 @@ static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) if ((flags & RTF_REJECT) || (dev && (dev->flags & IFF_LOOPBACK) && !(addr_type & IPV6_ADDR_LOOPBACK) && - !(flags & RTF_LOCAL))) + !(flags & (RTF_ANYCAST | RTF_LOCAL)))) return true; return false; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1fbb4dfbb191..5e2c34c0ac97 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1421,6 +1421,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ipip6_dev_free; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6d7ef78c88af..6434d17e6e8e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1028,6 +1028,7 @@ static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->ignore_df = 1; + skb_dst_drop(skb); #if IS_ENABLED(CONFIG_IPV6) if (l2tp_sk_is_v6(tunnel->sock)) error = inet6_csk_xmit(tunnel->sock, skb, NULL); @@ -1099,10 +1100,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len goto out_unlock; } - /* Get routing info from the tunnel socket */ - skb_dst_drop(skb); - skb_dst_set(skb, sk_dst_check(sk, 0)); - inet = inet_sk(sk); fl = &inet->cork.fl; switch (tunnel->encap) { diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 54fb8d452a7b..6e53e43c1907 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -273,6 +273,10 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) if (!sock_flag(sk, SOCK_ZAPPED)) goto out; + if (!addr->sllc_arphrd) + addr->sllc_arphrd = ARPHRD_ETHER; + if (addr->sllc_arphrd != ARPHRD_ETHER) + goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); @@ -328,7 +332,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; - if (unlikely(addr->sllc_family != AF_LLC)) + if (!addr->sllc_arphrd) + addr->sllc_arphrd = ARPHRD_ETHER; + if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER)) goto out; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; @@ -336,8 +342,6 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (sk->sk_bound_dev_if) { llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); if (llc->dev) { - if (!addr->sllc_arphrd) - addr->sllc_arphrd = llc->dev->type; if (is_zero_ether_addr(addr->sllc_mac)) memcpy(addr->sllc_mac, llc->dev->dev_addr, IFHWADDRLEN); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index aa5150929996..02cde0fd08fe 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1105,11 +1105,8 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) ttl, lifetime, 0, ifmsh->preq_id++, sdata); spin_lock_bh(&mpath->state_lock); - if (mpath->flags & MESH_PATH_DELETED) { - spin_unlock_bh(&mpath->state_lock); - goto enddiscovery; - } - mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); + if (!(mpath->flags & MESH_PATH_DELETED)) + mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); spin_unlock_bh(&mpath->state_lock); enddiscovery: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a88ab6fb16f2..5c5af4b5fc08 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2396,6 +2396,7 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) { + struct ieee80211_hdr *hdr = (void *)rx->skb->data; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -2406,6 +2407,31 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) if (status->flag & RX_FLAG_DECRYPTED) return 0; + /* check mesh EAPOL frames first */ + if (unlikely(rx->sta && ieee80211_vif_is_mesh(&rx->sdata->vif) && + ieee80211_is_data(fc))) { + struct ieee80211s_hdr *mesh_hdr; + u16 hdr_len = ieee80211_hdrlen(fc); + u16 ethertype_offset; + __be16 ethertype; + + if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr)) + goto drop_check; + + /* make sure fixed part of mesh header is there, also checks skb len */ + if (!pskb_may_pull(rx->skb, hdr_len + 6)) + goto drop_check; + + mesh_hdr = (struct ieee80211s_hdr *)(skb->data + hdr_len); + ethertype_offset = hdr_len + ieee80211_get_mesh_hdrlen(mesh_hdr) + + sizeof(rfc1042_header); + + if (skb_copy_bits(rx->skb, ethertype_offset, ðertype, 2) == 0 && + ethertype == rx->sdata->control_port_protocol) + return 0; + } + +drop_check: /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && !ieee80211_is_any_nullfunc(fc) && diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 7b1bacac39c6..cbc40b358ba2 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -639,11 +639,23 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; struct ieee80211_hdr *hdr = (void *)skb->data; + __be16 ethertype = 0; + + if (skb->len >= ETH_HLEN && skb->protocol == cpu_to_be16(ETH_P_802_3)) + skb_copy_bits(skb, 2 * ETH_ALEN, ðertype, ETH_TLEN); rcu_read_lock(); sdata = ieee80211_sdata_from_skb(local, skb); if (sdata) { - if (ieee80211_is_any_nullfunc(hdr->frame_control)) + if (ethertype == sdata->control_port_protocol || + ethertype == cpu_to_be16(ETH_P_PREAUTH)) + cfg80211_control_port_tx_status(&sdata->wdev, + cookie, + skb->data, + skb->len, + acked, + GFP_ATOMIC); + else if (ieee80211_is_any_nullfunc(hdr->frame_control)) cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, info->status.ack_signal, @@ -654,12 +666,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, skb->data, skb->len, acked, GFP_ATOMIC); else - cfg80211_control_port_tx_status(&sdata->wdev, - cookie, - skb->data, - skb->len, - acked, - GFP_ATOMIC); + pr_warn("Unknown status report in ack skb\n"); + } rcu_read_unlock(); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e9ce658141f5..1a2941e5244f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3996,6 +3996,9 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, skb_list_walk_safe(skb, skb, next) { skb_mark_not_on_list(skb); + if (skb->protocol == sdata->control_port_protocol) + ctrl_flags |= IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP; + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, ctrl_flags, cookie); if (IS_ERR(skb)) { @@ -4206,7 +4209,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER))) ra = sdata->u.mgd.bssid; - if (!is_valid_ether_addr(ra)) + if (is_zero_ether_addr(ra)) goto out_free; multicast = is_multicast_ether_addr(ra); @@ -5371,7 +5374,8 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; if (proto == sdata->control_port_protocol) - ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; + ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO | + IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP; if (unencrypted) flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b96d3660562f..19707c07efc1 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -449,9 +449,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, } static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, - struct mptcp_ext *ext) + struct sk_buff *skb, struct mptcp_ext *ext) { - if (!ext->use_map) { + if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped */ @@ -503,7 +503,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, opts->ext_copy = *mpext; if (skb && tcp_fin && subflow->data_fin_tx_enable) - mptcp_write_data_fin(subflow, &opts->ext_copy); + mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 486959f70cf3..a8ce04a4bb72 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -326,7 +326,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], set->variant = &bitmap_ip; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 2310a316e0af..2c625e0f49ec 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -363,7 +363,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e56ced66f202..7138e080def4 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -274,7 +274,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1ee43752d6d3..521e970be402 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -682,7 +682,7 @@ retry: } t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); if (!t->hregion) { - kfree(t); + ip_set_free(t); ret = -ENOMEM; goto out; } @@ -1533,7 +1533,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, } t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); if (!t->hregion) { - kfree(t); + ip_set_free(t); kfree(h); return -ENOMEM; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 79cd9dde457b..f33d72c5b06e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2158,6 +2158,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb) err = __nf_conntrack_update(net, skb, ct, ctinfo); if (err < 0) return err; + + ct = nf_ct_get(skb, &ctinfo); } return nf_confirm_cthelper(skb, ct, ctinfo); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 55ee680e9db1..9395ee8a868d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -351,22 +351,11 @@ int genl_register_family(struct genl_family *family) start = end = GENL_ID_VFS_DQUOT; } - if (family->maxattr && !family->parallel_ops) { - family->attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), - GFP_KERNEL); - if (family->attrbuf == NULL) { - err = -ENOMEM; - goto errout_locked; - } - } else - family->attrbuf = NULL; - family->id = idr_alloc_cyclic(&genl_fam_idr, family, start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_free; + goto errout_locked; } err = genl_validate_assign_mc_groups(family); @@ -385,8 +374,6 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); -errout_free: - kfree(family->attrbuf); errout_locked: genl_unlock_all(); return err; @@ -419,8 +406,6 @@ int genl_unregister_family(const struct genl_family *family) atomic_read(&genl_sk_destructing_cnt) == 0); genl_unlock(); - kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); return 0; @@ -485,30 +470,23 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, if (!family->maxattr) return NULL; - if (family->parallel_ops) { - attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), GFP_KERNEL); - if (!attrbuf) - return ERR_PTR(-ENOMEM); - } else { - attrbuf = family->attrbuf; - } + attrbuf = kmalloc_array(family->maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) + return ERR_PTR(-ENOMEM); err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, family->policy, validate, extack); if (err) { - if (family->parallel_ops) - kfree(attrbuf); + kfree(attrbuf); return ERR_PTR(err); } return attrbuf; } -static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, - struct nlattr **attrbuf) +static void genl_family_rcv_msg_attrs_free(struct nlattr **attrbuf) { - if (family->parallel_ops) - kfree(attrbuf); + kfree(attrbuf); } struct genl_start_context { @@ -542,7 +520,7 @@ static int genl_start(struct netlink_callback *cb) no_attrs: info = genl_dumpit_info_alloc(); if (!info) { - genl_family_rcv_msg_attrs_free(ctx->family, attrs); + genl_family_rcv_msg_attrs_free(attrs); return -ENOMEM; } info->family = ctx->family; @@ -559,7 +537,7 @@ no_attrs: } if (rc) { - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); cb->data = NULL; } @@ -588,7 +566,7 @@ static int genl_lock_done(struct netlink_callback *cb) rc = ops->done(cb); genl_unlock(); } - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -601,7 +579,7 @@ static int genl_parallel_done(struct netlink_callback *cb) if (ops->done) rc = ops->done(cb); - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -694,7 +672,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, family->post_doit(ops, skb, &info); out: - genl_family_rcv_msg_attrs_free(family, attrbuf); + genl_family_rcv_msg_attrs_free(attrbuf); return err; } @@ -1166,60 +1144,11 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; -static int genl_bind(struct net *net, int group) -{ - struct genl_family *f; - int err = -ENOENT; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (!f->netnsok && net != &init_net) - err = -ENOENT; - else if (f->mcast_bind) - err = f->mcast_bind(net, fam_grp); - else - err = 0; - break; - } - } - up_read(&cb_lock); - - return err; -} - -static void genl_unbind(struct net *net, int group) -{ - struct genl_family *f; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (f->mcast_unbind) - f->mcast_unbind(net, fam_grp); - break; - } - } - up_read(&cb_lock); -} - static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, - .bind = genl_bind, - .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 2d8d6131bc5f..24a8c3c6da0d 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -166,6 +166,7 @@ static void __qrtr_node_release(struct kref *kref) { struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); struct radix_tree_iter iter; + struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; @@ -181,8 +182,9 @@ static void __qrtr_node_release(struct kref *kref) /* Free tx flow counters */ radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { + flow = *slot; radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); - kfree(*slot); + kfree(flow); } kfree(node); } @@ -427,7 +429,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) unsigned int ver; size_t hdrlen; - if (len & 3) + if (len == 0 || len & 3) return -EINVAL; skb = netdev_alloc_skb(NULL, len); @@ -441,6 +443,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) switch (ver) { case QRTR_PROTO_VER_1: + if (len < sizeof(*v1)) + goto err; v1 = data; hdrlen = sizeof(*v1); @@ -454,6 +458,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) size = le32_to_cpu(v1->size); break; case QRTR_PROTO_VER_2: + if (len < sizeof(*v2)) + goto err; v2 = data; hdrlen = sizeof(*v2) + v2->optlen; diff --git a/net/rds/connection.c b/net/rds/connection.c index ed7f2133acc2..f2fcab182095 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -905,6 +905,17 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) } EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); +/* Check connectivity of all paths + */ +void rds_check_all_paths(struct rds_connection *conn) +{ + int i = 0; + + do { + rds_conn_path_connect_if_down(&conn->c_path[i]); + } while (++i < conn->c_npaths); +} + void rds_conn_connect_if_down(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); diff --git a/net/rds/rds.h b/net/rds/rds.h index 6019b0c004a9..106e862996b9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -778,6 +778,7 @@ void rds_conn_drop(struct rds_connection *conn); void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); +void rds_check_all_paths(struct rds_connection *conn); void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, @@ -823,6 +824,12 @@ rds_conn_path_up(struct rds_conn_path *cp) } static inline int +rds_conn_path_down(struct rds_conn_path *cp) +{ + return atomic_read(&cp->cp_state) == RDS_CONN_DOWN; +} + +static inline int rds_conn_up(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); diff --git a/net/rds/send.c b/net/rds/send.c index 68e2bdb08fd0..9a529a01cdc6 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1340,7 +1340,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } - rds_conn_path_connect_if_down(cpath); + if (rds_conn_path_down(cpath)) + rds_check_all_paths(conn); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); if (ret) { diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 43a243081e7d..f901421b0634 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&ca->tcf_tm); bstats_update(&ca->tcf_bstats, skb); - if (skb->protocol == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 9035355e867f..f5826e457679 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, goto drop; update_flags = params->update_flags; - protocol = tc_skb_protocol(skb); + protocol = skb_protocol(skb, false); again: switch (protocol) { case cpu_to_be16(ETH_P_IP): diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index fadfd6b0033b..3893e03454db 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -624,7 +624,7 @@ static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) { u8 family = NFPROTO_UNSPEC; - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): family = NFPROTO_IPV4; break; @@ -748,6 +748,7 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype) { + __be16 proto = skb_protocol(skb, true); int hooknum, err = NF_ACCEPT; /* See HOOK2MANIP(). */ @@ -759,14 +760,13 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (skb->protocol == htons(ETH_P_IP) && + if (proto == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto out; - } else if (IS_ENABLED(CONFIG_IPV6) && - skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; int hdrlen = ipv6_skip_exthdr(skb, @@ -925,6 +925,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, force = p->ct_action & TCA_CT_ACT_FORCE; tmpl = p->tmpl; + tcf_lastuse_update(&c->tcf_tm); + if (clear) { ct = nf_ct_get(skb, &ctinfo); if (ct) { @@ -1550,4 +1552,3 @@ MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>"); MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>"); MODULE_DESCRIPTION("Connection tracking action"); MODULE_LICENSE("GPL v2"); - diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 19649623493b..b5042f3ea079 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -96,19 +96,22 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, action = READ_ONCE(ca->tcf_action); wlen = skb_network_offset(skb); - if (tc_skb_protocol(skb) == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV4; - } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index be3f215cd027..8118e2640979 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -82,7 +82,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_PUSH: - new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol)); + new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb_protocol(skb, true))); if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len, skb->dev && skb->dev->type == ARPHRD_ETHER)) goto drop; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 361b863e0634..d0652386c6e2 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, if (params->flags & SKBEDIT_F_INHERITDSFIELD) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e9e119ea6813..e617f3e27ec0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1538,7 +1538,7 @@ static inline int __tcf_classify(struct sk_buff *skb, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { - __be16 protocol = tc_skb_protocol(skb); + __be16 protocol = skb_protocol(skb, false); int err; if (tp->protocol != protocol && diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 80ae7b9fa90a..ab53a93b2f2b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) if (dst) return ntohl(dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_proto(const struct sk_buff *skb, @@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, if (flow->ports.ports) return ntohs(flow->ports.dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -164,7 +164,7 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 391971672d54..acd8e05c2ba5 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -313,7 +313,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, /* skb_flow_dissect() does not set n_proto in case an unknown * protocol, so do it rather here. */ - skb_key.basic.n_proto = skb->protocol; + skb_key.basic.n_proto = skb_protocol(skb, false); skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index df00566d327d..c95cf86fb431 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, }; int ret, network_offset; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): state.pf = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index 18755d29fd15..3650117da47f 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -212,7 +212,7 @@ static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em, struct nf_hook_state state; int ret; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index d99966a55c84..46254968d390 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -195,7 +195,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = tc_skb_protocol(skb); + dst->value = skb_protocol(skb, false); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index fb6b16c4e46d..1d5e422d9be2 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -553,16 +553,16 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); + p->link.vcc = NULL; + p->link.sock = NULL; + p->link.common.classid = sch->handle; + p->link.ref = 1; err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, extack); if (err) return err; - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index e9c502dd29a2..b3cdcd86cbfd 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -592,7 +592,7 @@ static bool cake_update_flowkeys(struct flow_keys *keys, bool rev = !skb->_nfct, upd = false; __be32 ip; - if (tc_skb_protocol(skb) != htons(ETH_P_IP)) + if (skb_protocol(skb, true) != htons(ETH_P_IP)) return false; if (!nf_ct_get_tuple_skb(&tuple, skb)) @@ -1557,7 +1557,7 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) u16 *buf, buf_; u8 dscp; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); if (unlikely(!buf)) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index fbe49fffcdbb..76a9c4f277f2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *ro if (p->set_tc_index) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen) || @@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, p->mv[index].value); @@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mv[index].mask != 0xff || p->mv[index].value) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(tc_skb_protocol(skb))); + __func__, ntohs(skb_protocol(skb, true))); break; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 511964653476..b586eec2eaeb 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), haddr, NULL, skb->len); if (err < 0) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index d5627df24215..779f4142a11d 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -27,6 +27,7 @@ #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 +#define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; @@ -36,7 +37,7 @@ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ -static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) +static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { struct smc_clc_msg_proposal_prefix *pclc_prfx; struct smc_clc_msg_accept_confirm *clc; @@ -49,12 +50,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return false; switch (clcm->type) { case SMC_CLC_PROPOSAL: - if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && - clcm->path != SMC_TYPE_B) - return false; pclc = (struct smc_clc_msg_proposal *)clcm; pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (ntohs(pclc->hdr.length) != + if (ntohs(pclc->hdr.length) < sizeof(*pclc) + ntohs(pclc->iparea_offset) + sizeof(*pclc_prfx) + pclc_prfx->ipv6_prefixes_cnt * @@ -86,7 +84,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) default: return false; } - if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && + if (check_trl && + memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; return true; @@ -276,7 +275,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, struct msghdr msg = {NULL, 0}; int reason_code = 0; struct kvec vec = {buf, buflen}; - int len, datlen; + int len, datlen, recvlen; + bool check_trl = true; int krflags; /* peek the first few bytes to determine length of data to receive @@ -320,10 +320,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen > buflen) || - (clcm->version != SMC_CLC_V1) || - (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && - clcm->path != SMC_TYPE_B) || + (clcm->version < SMC_CLC_V1) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -331,16 +328,38 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, goto out; } + if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N) + reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */ + /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen); + if (datlen > buflen) { + check_trl = false; + recvlen = buflen; + } else { + recvlen = datlen; + } + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); krflags = MSG_WAITALL; len = sock_recvmsg(smc->clcsock, &msg, krflags); - if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { + if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; } + datlen -= len; + while (datlen) { + u8 tmp[SMC_CLC_RECV_BUF_LEN]; + + vec.iov_base = &tmp; + vec.iov_len = SMC_CLC_RECV_BUF_LEN; + /* receive remaining proposal message */ + recvlen = datlen > SMC_CLC_RECV_BUF_LEN ? + SMC_CLC_RECV_BUF_LEN : datlen; + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); + len = sock_recvmsg(smc->clcsock, &msg, krflags); + datlen -= len; + } if (clcm->type == SMC_CLC_DECLINE) { struct smc_clc_msg_decline *dclc; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 465876701b75..76c2b150d040 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -25,6 +25,7 @@ #define SMC_CLC_V1 0x1 /* SMC version */ #define SMC_TYPE_R 0 /* SMC-R only */ #define SMC_TYPE_D 1 /* SMC-D only */ +#define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */ #define SMC_TYPE_B 3 /* SMC-R and SMC-D */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define CLC_WAIT_TIME_SHORT HZ /* short wait time on clcsock */ @@ -46,6 +47,7 @@ #define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */ #define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ +#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 7964a21e5e6f..f69d205b3e11 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -15,6 +15,7 @@ #include <linux/workqueue.h> #include <linux/wait.h> #include <linux/reboot.h> +#include <linux/mutex.h> #include <net/tcp.h> #include <net/sock.h> #include <rdma/ib_verbs.h> @@ -247,7 +248,8 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) if (smc_link_usable(lnk)) lnk->state = SMC_LNK_INACTIVE; } - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_msg_waiter); + wake_up_all(&lgr->llc_flow_waiter); } static void smc_lgr_free(struct smc_link_group *lgr); @@ -1130,18 +1132,19 @@ static void smcr_link_up(struct smc_link_group *lgr, return; if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* some other llc task is ongoing */ - wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), + wait_event_timeout(lgr->llc_flow_waiter, + (list_empty(&lgr->list) || + lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), SMC_LLC_WAIT_TIME); } - if (list_empty(&lgr->list) || - !smc_ib_port_active(smcibdev, ibport)) - return; /* lgr or device no longer active */ - link = smc_llc_usable_link(lgr); - if (!link) - return; - smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid, - NULL, SMC_LLC_REQ); + /* lgr or device no longer active? */ + if (!list_empty(&lgr->list) && + smc_ib_port_active(smcibdev, ibport)) + link = smc_llc_usable_link(lgr); + if (link) + smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], + gid, NULL, SMC_LLC_REQ); + wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ } } @@ -1195,13 +1198,17 @@ static void smcr_link_down(struct smc_link *lnk) if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* another llc task is ongoing */ mutex_unlock(&lgr->llc_conf_mutex); - wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), + wait_event_timeout(lgr->llc_flow_waiter, + (list_empty(&lgr->list) || + lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), SMC_LLC_WAIT_TIME); mutex_lock(&lgr->llc_conf_mutex); } - smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, - SMC_LLC_DEL_LOST_PATH); + if (!list_empty(&lgr->list)) + smc_llc_send_delete_link(to_lnk, del_link_id, + SMC_LLC_REQ, true, + SMC_LLC_DEL_LOST_PATH); + wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ } } @@ -1262,7 +1269,7 @@ static void smc_link_down_work(struct work_struct *work) if (list_empty(&lgr->list)) return; - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_msg_waiter); mutex_lock(&lgr->llc_conf_mutex); smcr_link_down(link); mutex_unlock(&lgr->llc_conf_mutex); @@ -1955,20 +1962,20 @@ static void smc_core_going_away(void) struct smc_ib_device *smcibdev; struct smcd_dev *smcd; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { int i; for (i = 0; i < SMC_MAX_PORTS; i++) set_bit(i, smcibdev->ports_going_away); } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd, &smcd_dev_list.list, list) { smcd->going_away = 1; } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } /* Clean up all SMC link groups */ @@ -1980,10 +1987,10 @@ static void smc_lgrs_shutdown(void) smc_smcr_terminate_all(NULL); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd, &smcd_dev_list.list, list) smc_smcd_terminate_all(smcd); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } static int smc_core_reboot_event(struct notifier_block *this, diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 86d160f0d187..c3ff512fd891 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -262,8 +262,10 @@ struct smc_link_group { struct work_struct llc_del_link_work; struct work_struct llc_event_work; /* llc event worker */ - wait_queue_head_t llc_waiter; + wait_queue_head_t llc_flow_waiter; /* w4 next llc event */ + wait_queue_head_t llc_msg_waiter; + /* w4 next llc msg */ struct smc_llc_flow llc_flow_lcl; /* llc local control field */ struct smc_llc_flow llc_flow_rmt; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 562a52d01ad1..7637fdebbb78 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -16,6 +16,7 @@ #include <linux/workqueue.h> #include <linux/scatterlist.h> #include <linux/wait.h> +#include <linux/mutex.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -33,7 +34,7 @@ #define SMC_QP_RNR_RETRY 7 /* 7: infinite */ struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ - .lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock), + .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex), .list = LIST_HEAD_INIT(smc_ib_devices.list), }; @@ -565,9 +566,9 @@ static int smc_ib_add_dev(struct ib_device *ibdev) INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); atomic_set(&smcibdev->lnk_cnt, 0); init_waitqueue_head(&smcibdev->lnks_deleted); - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_add_tail(&smcibdev->list, &smc_ib_devices.list); - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); ib_set_client_data(ibdev, &smc_ib_client, smcibdev); INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, smc_ib_global_event_handler); @@ -602,9 +603,9 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { struct smc_ib_device *smcibdev = client_data; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); pr_warn_ratelimited("smc: removing ib device %s\n", smcibdev->ibdev->name); smc_smcr_terminate_all(smcibdev); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index e6a696ae15f3..ae6776e1e726 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/if_ether.h> +#include <linux/mutex.h> #include <linux/wait.h> #include <rdma/ib_verbs.h> #include <net/smc.h> @@ -25,7 +26,7 @@ struct smc_ib_devices { /* list of smc ib devices definition */ struct list_head list; - spinlock_t lock; /* protects list of smc ib devices */ + struct mutex mutex; /* protects list of smc ib devices */ }; extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */ diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 91f85fc09fb8..998c525de785 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -7,6 +7,7 @@ */ #include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/slab.h> #include <asm/page.h> @@ -17,7 +18,7 @@ struct smcd_dev_list smcd_dev_list = { .list = LIST_HEAD_INIT(smcd_dev_list.list), - .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock) + .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) }; /* Test if an ISM communication is possible. */ @@ -317,9 +318,9 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev); int smcd_register_dev(struct smcd_dev *smcd) { - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_add_tail(&smcd->list, &smcd_dev_list.list); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", dev_name(&smcd->dev), smcd->pnetid, @@ -333,9 +334,9 @@ void smcd_unregister_dev(struct smcd_dev *smcd) { pr_warn_ratelimited("smc: removing smcd device %s\n", dev_name(&smcd->dev)); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_del_init(&smcd->list); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); smcd->going_away = 1; smc_smcd_terminate_all(smcd); flush_workqueue(smcd->event_wq); diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 4da946cbfa29..81cc4537efd3 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -10,12 +10,13 @@ #define SMCD_ISM_H #include <linux/uio.h> +#include <linux/mutex.h> #include "smc.h" struct smcd_dev_list { /* List of SMCD devices */ struct list_head list; - spinlock_t lock; /* Protects list of devices */ + struct mutex mutex; /* Protects list of devices */ }; extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 391237b601fe..c1a038689c63 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -186,6 +186,26 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, flow->qentry = qentry; } +static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, + struct smc_llc_qentry *qentry) +{ + u8 msg_type = qentry->msg.raw.hdr.common.type; + + if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && + flow_type != msg_type && !lgr->delayed_event) { + lgr->delayed_event = qentry; + return; + } + /* drop parallel or already-in-progress llc requests */ + if (flow_type != msg_type) + pr_warn_once("smc: SMC-R lg %*phN dropped parallel " + "LLC msg: msg %d flow %d role %d\n", + SMC_LGR_ID_SIZE, &lgr->id, + qentry->msg.raw.hdr.common.type, + flow_type, lgr->role); + kfree(qentry); +} + /* try to start a new llc flow, initiated by an incoming llc msg */ static bool smc_llc_flow_start(struct smc_llc_flow *flow, struct smc_llc_qentry *qentry) @@ -195,14 +215,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, spin_lock_bh(&lgr->llc_flow_lock); if (flow->type) { /* a flow is already active */ - if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK || - qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) && - !lgr->delayed_event) { - lgr->delayed_event = qentry; - } else { - /* forget this llc request */ - kfree(qentry); - } + smc_llc_flow_parallel(lgr, flow->type, qentry); spin_unlock_bh(&lgr->llc_flow_lock); return false; } @@ -222,8 +235,8 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, } if (qentry == lgr->delayed_event) lgr->delayed_event = NULL; - spin_unlock_bh(&lgr->llc_flow_lock); smc_llc_flow_qentry_set(flow, qentry); + spin_unlock_bh(&lgr->llc_flow_lock); return true; } @@ -251,11 +264,11 @@ again: return 0; } spin_unlock_bh(&lgr->llc_flow_lock); - rc = wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && - (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || - lgr->llc_flow_rmt.type == allowed_remote)), - SMC_LLC_WAIT_TIME); + rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) || + (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && + (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || + lgr->llc_flow_rmt.type == allowed_remote))), + SMC_LLC_WAIT_TIME * 10); if (!rc) return -ETIMEDOUT; goto again; @@ -272,7 +285,7 @@ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow) flow == &lgr->llc_flow_lcl) schedule_work(&lgr->llc_event_work); else - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_flow_waiter); } /* lnk is optional and used for early wakeup when link goes down, useful in @@ -283,26 +296,32 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, int time_out, u8 exp_msg) { struct smc_llc_flow *flow = &lgr->llc_flow_lcl; + u8 rcv_msg; - wait_event_interruptible_timeout(lgr->llc_waiter, - (flow->qentry || - (lnk && !smc_link_usable(lnk)) || - list_empty(&lgr->list)), - time_out); + wait_event_timeout(lgr->llc_msg_waiter, + (flow->qentry || + (lnk && !smc_link_usable(lnk)) || + list_empty(&lgr->list)), + time_out); if (!flow->qentry || (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) { smc_llc_flow_qentry_del(flow); goto out; } - if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) { + rcv_msg = flow->qentry->msg.raw.hdr.common.type; + if (exp_msg && rcv_msg != exp_msg) { if (exp_msg == SMC_LLC_ADD_LINK && - flow->qentry->msg.raw.hdr.common.type == - SMC_LLC_DELETE_LINK) { + rcv_msg == SMC_LLC_DELETE_LINK) { /* flow_start will delay the unexpected msg */ smc_llc_flow_start(&lgr->llc_flow_lcl, smc_llc_flow_qentry_clr(flow)); return NULL; } + pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: " + "msg %d exp %d flow %d role %d flags %x\n", + SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg, + flow->type, lgr->role, + flow->qentry->msg.raw.hdr.flags); smc_llc_flow_qentry_del(flow); } out: @@ -1222,8 +1241,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) smc_llc_send_message(lnk, &qentry->msg); /* response */ if (smc_link_downing(&lnk_del->state)) { - smc_switch_conns(lgr, lnk_del, false); - smc_wr_tx_wait_no_pending_sends(lnk_del); + if (smc_switch_conns(lgr, lnk_del, false)) + smc_wr_tx_wait_no_pending_sends(lnk_del); } smcr_link_clear(lnk_del, true); @@ -1297,8 +1316,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) goto out; /* asymmetric link already deleted */ if (smc_link_downing(&lnk_del->state)) { - smc_switch_conns(lgr, lnk_del, false); - smc_wr_tx_wait_no_pending_sends(lnk_del); + if (smc_switch_conns(lgr, lnk_del, false)) + smc_wr_tx_wait_no_pending_sends(lnk_del); } if (!list_empty(&lgr->list)) { /* qentry is either a request from peer (send it back to @@ -1459,7 +1478,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_add_link_work); @@ -1474,7 +1493,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); return; } break; @@ -1485,7 +1504,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* DEL LINK REQ during ADD LINK SEQ */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_del_link_work); @@ -1496,7 +1515,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* DEL LINK REQ during ADD LINK SEQ */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_del_link_work); @@ -1581,7 +1600,7 @@ static void smc_llc_rx_response(struct smc_link *link, case SMC_LLC_DELETE_RKEY: /* assign responses to the local flow, we requested them */ smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&link->lgr->llc_waiter); + wake_up(&link->lgr->llc_msg_waiter); return; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ @@ -1616,7 +1635,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) spin_lock_irqsave(&lgr->llc_event_q_lock, flags); list_add_tail(&qentry->list, &lgr->llc_event_q); spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags); - schedule_work(&link->lgr->llc_event_work); + schedule_work(&lgr->llc_event_work); } /* copy received msg and add it to the event queue */ @@ -1677,7 +1696,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) INIT_LIST_HEAD(&lgr->llc_event_q); spin_lock_init(&lgr->llc_event_q_lock); spin_lock_init(&lgr->llc_flow_lock); - init_waitqueue_head(&lgr->llc_waiter); + init_waitqueue_head(&lgr->llc_flow_waiter); + init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; } @@ -1686,7 +1706,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) void smc_llc_lgr_clear(struct smc_link_group *lgr) { smc_llc_event_flush(lgr); - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_flow_waiter); + wake_up_all(&lgr->llc_msg_waiter); cancel_work_sync(&lgr->llc_event_work); cancel_work_sync(&lgr->llc_add_link_work); cancel_work_sync(&lgr->llc_del_link_work); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 014d91b9778e..30e5fac7034e 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/list.h> #include <linux/ctype.h> +#include <linux/mutex.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -129,7 +130,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) return rc; /* remove ib devices */ - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { if (ibdev->pnetid_by_user[ibport] && @@ -149,9 +150,9 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) } } } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); /* remove smcd devices */ - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { if (smcd_dev->pnetid_by_user && (!pnet_name || @@ -165,7 +166,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) rc = 0; } } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return rc; } @@ -240,14 +241,14 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, SMC_MAX_PNETID_LEN); ib_dev->pnetid_by_user[ib_port - 1] = true; applied = true; } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); return applied; } @@ -258,13 +259,13 @@ static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); smcd_dev->pnetid_by_user = true; applied = true; } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return applied; } @@ -300,7 +301,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) { struct smc_ib_device *ibdev; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || @@ -311,7 +312,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) } ibdev = NULL; out: - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); return ibdev; } @@ -320,7 +321,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) { struct smcd_dev *smcd_dev; - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, IB_DEVICE_NAME_MAX - 1)) @@ -328,7 +329,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) } smcd_dev = NULL; out: - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return smcd_dev; } @@ -825,7 +826,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, int i; ini->ib_dev = NULL; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (ibdev == known_dev) continue; @@ -844,7 +845,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, } } out: - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); } /* find alternate roce device with same pnet_id and vlan_id */ @@ -863,7 +864,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, { struct smc_ib_device *ibdev; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { struct net_device *ndev; int i; @@ -888,7 +889,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, } } } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); } /* Determine the corresponding IB device port based on the hardware PNETID. @@ -924,7 +925,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) return; /* pnetid could not be determined */ - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away) { @@ -932,7 +933,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, break; } } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } /* PNET table analysis for a given sock: diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 7239ba9b99dc..1e23cdd41eb1 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -169,6 +169,8 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; + if (!smc_link_usable(link)) + return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) return 0; @@ -560,15 +562,15 @@ void smc_wr_free_link(struct smc_link *lnk) { struct ib_device *ibdev; + if (!lnk->smcibdev) + return; + ibdev = lnk->smcibdev->ibdev; + if (smc_wr_tx_wait_no_pending_sends(lnk)) memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); - if (!lnk->smcibdev) - return; - ibdev = lnk->smcibdev->ibdev; - if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 39e14d5edaf1..e9d0953522f0 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1317,6 +1317,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) q.len = strlen(gssd_dummy_clnt_dir[0].name); clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); if (!clnt_dentry) { + __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); goto out; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5c4ec9386f81..c537272f9c7e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -44,6 +44,7 @@ #include <net/tcp.h> #include <net/tcp_states.h> #include <linux/uaccess.h> +#include <linux/highmem.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6f7d82fb1eb0..be11d672b5b9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1118,6 +1118,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->head[0].iov_len; + subbuf->head[0].iov_base = buf->head[0].iov_base; subbuf->head[0].iov_len = 0; } @@ -1130,6 +1131,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->page_len; + subbuf->pages = buf->pages; + subbuf->page_base = 0; subbuf->page_len = 0; } @@ -1141,6 +1144,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->tail[0].iov_len; + subbuf->tail[0].iov_base = buf->tail[0].iov_base; subbuf->tail[0].iov_len = 0; } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ef997880e17a..b647562a26dd 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -367,7 +367,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_fastreg(wc, frwr); /* The MR will get recycled when the associated req is retransmitted */ - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -452,7 +452,7 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_li(wc, frwr); __frwr_release_mr(wc, mr); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -474,7 +474,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) __frwr_release_mr(wc, mr); complete(&frwr->fr_linv_done); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -582,7 +582,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) smp_rmb(); rpcrdma_complete_rqst(rep); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2081c8fbfa48..935bbef2f7be 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1349,8 +1349,7 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, be32_to_cpup(p), be32_to_cpu(rep->rr_xid)); } - r_xprt->rx_stats.bad_reply_count++; - return -EREMOTEIO; + return -EIO; } /* Perform XID lookup, reconstruction of the RPC reply, and @@ -1387,13 +1386,11 @@ out: spin_unlock(&xprt->queue_lock); return; -/* If the incoming reply terminated a pending RPC, the next - * RPC call will post a replacement receive buffer as it is - * being marshaled. - */ out_badheader: trace_xprtrdma_reply_hdr(rep); r_xprt->rx_stats.bad_reply_count++; + rqst->rq_task->tk_status = status; + status = 0; goto out; } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0c4af7f5e241..14165b673b20 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -242,7 +242,7 @@ xprt_rdma_connect_worker(struct work_struct *work) rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); - if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) { + if (!rc) { xprt->connect_cookie++; xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2ae348377806..2198c8ec8dff 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -84,7 +84,8 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep); +static void rpcrdma_ep_get(struct rpcrdma_ep *ep); +static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, gfp_t flags); @@ -97,7 +98,8 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); */ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct rdma_cm_id *id = r_xprt->rx_ep->re_id; + struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct rdma_cm_id *id = ep->re_id; /* Flush Receives, then wait for deferred Reply work * to complete. @@ -108,6 +110,8 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) * local invalidations. */ ib_drain_sq(id->qp); + + rpcrdma_ep_put(ep); } /** @@ -126,23 +130,27 @@ static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) trace_xprtrdma_qp_event(ep, event); } +/* Ensure xprt_force_disconnect() is invoked exactly once when a + * connection is closed or lost. (The important thing is it needs + * to be invoked "at least" once). + */ +static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) +{ + if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) + xprt_force_disconnect(ep->re_xprt); +} + /** * rpcrdma_flush_disconnect - Disconnect on flushed completion - * @cq: completion queue + * @r_xprt: transport to disconnect * @wc: work completion entry * * Must be called in process context. */ -void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) { - struct rpcrdma_xprt *r_xprt = cq->cq_context; - struct rpc_xprt *xprt = &r_xprt->rx_xprt; - - if (wc->status != IB_WC_SUCCESS && - r_xprt->rx_ep->re_connect_status == 1) { - r_xprt->rx_ep->re_connect_status = -ECONNABORTED; - xprt_force_disconnect(xprt); - } + if (wc->status != IB_WC_SUCCESS) + rpcrdma_force_disconnect(r_xprt->rx_ep); } /** @@ -156,11 +164,12 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_sendctx *sc = container_of(cqe, struct rpcrdma_sendctx, sc_cqe); + struct rpcrdma_xprt *r_xprt = cq->cq_context; /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_send(sc, wc); - rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_sendctx_put_locked(r_xprt, sc); + rpcrdma_flush_disconnect(r_xprt, wc); } /** @@ -195,7 +204,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; out_flushed: - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(r_xprt, wc); rpcrdma_rep_destroy(rep); } @@ -239,7 +248,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; struct rpcrdma_ep *ep = id->context; - struct rpc_xprt *xprt = ep->re_xprt; might_sleep(); @@ -263,10 +271,9 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) /* fall through */ case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; - xprt_force_disconnect(xprt); goto disconnected; case RDMA_CM_EVENT_ESTABLISHED: - kref_get(&ep->re_kref); + rpcrdma_ep_get(ep); ep->re_connect_status = 1; rpcrdma_update_cm_private(ep, &event->param.conn); trace_xprtrdma_inline_thresh(ep); @@ -288,8 +295,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DISCONNECTED: ep->re_connect_status = -ECONNABORTED; disconnected: - xprt_force_disconnect(xprt); - return rpcrdma_ep_destroy(ep); + rpcrdma_force_disconnect(ep); + return rpcrdma_ep_put(ep); default: break; } @@ -345,7 +352,7 @@ out: return ERR_PTR(rc); } -static void rpcrdma_ep_put(struct kref *kref) +static void rpcrdma_ep_destroy(struct kref *kref) { struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); @@ -369,13 +376,18 @@ static void rpcrdma_ep_put(struct kref *kref) module_put(THIS_MODULE); } +static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) +{ + kref_get(&ep->re_kref); +} + /* Returns: * %0 if @ep still has a positive kref count, or * %1 if @ep was destroyed successfully. */ -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep) +static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) { - return kref_put(&ep->re_kref, rpcrdma_ep_put); + return kref_put(&ep->re_kref, rpcrdma_ep_destroy); } static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) @@ -492,7 +504,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) return 0; out_destroy: - rpcrdma_ep_destroy(ep); + rpcrdma_ep_put(ep); rdma_destroy_id(id); out_free: kfree(ep); @@ -519,10 +531,13 @@ retry: return rc; ep = r_xprt->rx_ep; - ep->re_connect_status = 0; xprt_clear_connected(xprt); - rpcrdma_reset_cwnd(r_xprt); + + /* Bump the ep's reference count while there are + * outstanding Receives. + */ + rpcrdma_ep_get(ep); rpcrdma_post_recvs(r_xprt, true); rc = rpcrdma_sendctxs_create(r_xprt); @@ -552,8 +567,6 @@ retry: rpcrdma_mrs_create(r_xprt); out: - if (rc) - ep->re_connect_status = rc; trace_xprtrdma_connect(r_xprt, rc); return rc; } @@ -587,7 +600,7 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); - if (rpcrdma_ep_destroy(ep)) + if (rpcrdma_ep_put(ep)) rdma_destroy_id(id); r_xprt->rx_ep = NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fdb09b2c..43974ef39a50 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -82,6 +82,7 @@ struct rpcrdma_ep { unsigned int re_max_inline_recv; int re_async_rc; int re_connect_status; + atomic_t re_force_disconnect; struct ib_qp_init_attr re_attr; wait_queue_head_t re_connect_wait; struct rpc_xprt *re_xprt; @@ -446,7 +447,7 @@ extern unsigned int xprt_rdma_memreg_strategy; /* * Endpoint calls - xprtrdma/verbs.c */ -void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc); +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc); int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1c579357ccdf..f1d9c33dae72 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -921,6 +921,21 @@ static void link_prepare_wakeup(struct tipc_link *l) } +/** + * tipc_link_set_skb_retransmit_time - set the time at which retransmission of + * the given skb should be next attempted + * @skb: skb to set a future retransmission time for + * @l: link the skb will be transmitted on + */ +static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, + struct tipc_link *l) +{ + if (link_is_bc_sndlink(l)) + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + else + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; +} + void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; @@ -1036,9 +1051,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -ENOBUFS; } __skb_queue_tail(transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; @@ -1139,9 +1152,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l, if (unlikely(skb == l->backlog[imp].target_bskb)) l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; @@ -1584,8 +1595,7 @@ release: /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; - TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ? - TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 263ae395ad44..0e07fb8585fb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5016,7 +5016,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms.he_obss_pd); - goto out; + if (err) + goto out; } if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { @@ -5024,7 +5025,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_HE_BSS_COLOR], ¶ms.he_bss_color); if (err) - return err; + goto out; } nl80211_calculate_ap_params(¶ms); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 540ed75e4482..08b80669f649 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -2,9 +2,6 @@ #include <net/xsk_buff_pool.h> #include <net/xdp_sock.h> -#include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> -#include <linux/swiotlb.h> #include "xsk_queue.h" @@ -55,7 +52,6 @@ struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, pool->free_heads_cnt = chunks; pool->headroom = headroom; pool->chunk_size = chunk_size; - pool->cheap_dma = true; pool->unaligned = unaligned; pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; INIT_LIST_HEAD(&pool->free_list); @@ -125,48 +121,6 @@ static void xp_check_dma_contiguity(struct xsk_buff_pool *pool) } } -static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_SWIOTLB) - phys_addr_t paddr; - u32 i; - - for (i = 0; i < pool->dma_pages_cnt; i++) { - paddr = dma_to_phys(pool->dev, pool->dma_pages[i]); - if (is_swiotlb_buffer(paddr)) - return false; - } -#endif - return true; -} - -static bool xp_check_cheap_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_HAS_DMA) - const struct dma_map_ops *ops = get_dma_ops(pool->dev); - - if (ops) { - return !ops->sync_single_for_cpu && - !ops->sync_single_for_device; - } - - if (!dma_is_direct(ops)) - return false; - - if (!xp_check_swiotlb_dma(pool)) - return false; - - if (!dev_is_dma_coherent(pool->dev)) { -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) - return false; -#endif - } -#endif - return true; -} - int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages) { @@ -180,6 +134,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, pool->dev = dev; pool->dma_pages_cnt = nr_pages; + pool->dma_need_sync = false; for (i = 0; i < pool->dma_pages_cnt; i++) { dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, @@ -188,14 +143,13 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, xp_dma_unmap(pool, attrs); return -ENOMEM; } + if (dma_need_sync(dev, dma)) + pool->dma_need_sync = true; pool->dma_pages[i] = dma; } if (pool->unaligned) xp_check_dma_contiguity(pool); - - pool->dev = dev; - pool->cheap_dma = xp_check_cheap_dma(pool); return 0; } EXPORT_SYMBOL(xp_dma_map); @@ -280,7 +234,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; - if (!pool->cheap_dma) { + if (pool->dma_need_sync) { dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, pool->frame_len, DMA_BIDIRECTIONAL); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index c407ecbc5d46..b615729812e5 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -37,6 +37,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/ip_tunnels.h> #include <net/addrconf.h> #include <net/xfrm.h> #include <net/net_namespace.h> @@ -581,6 +582,7 @@ static const struct net_device_ops xfrmi_netdev_ops = { static void xfrmi_dev_setup(struct net_device *dev) { dev->netdev_ops = &xfrmi_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_NONE; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; |