From be8c827f50a0bcd56361b31ada11dc0a3c2fd240 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 29 Mar 2020 22:50:06 +0200 Subject: mac80211: fix authentication with iwlwifi/mvm The original patch didn't copy the ieee80211_is_data() condition because on most drivers the management frames don't go through this path. However, they do on iwlwifi/mvm, so we do need to keep the condition here. Cc: stable@vger.kernel.org Fixes: ce2e1ca70307 ("mac80211: Check port authorization in the ieee80211_tx_dequeue() case") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d9cca6dbd870..efe4c1fc68e5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3610,7 +3610,8 @@ begin: * Drop unicast frames to unauthorised stations unless they are * EAPOL frames from the local station. */ - if (unlikely(!ieee80211_vif_is_mesh(&tx.sdata->vif) && + if (unlikely(ieee80211_is_data(hdr->frame_control) && + !ieee80211_vif_is_mesh(&tx.sdata->vif) && tx.sdata->vif.type != NL80211_IFTYPE_OCB && !is_multicast_ether_addr(hdr->addr1) && !test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) && -- cgit v1.2.3-58-ga151 From fbe4e0c1b298b4665ee6915266c9d6c5b934ef4a Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 25 Mar 2020 18:01:00 -0400 Subject: ipv4: fix a RCU-list lock in fib_triestat_seq_show fib_triestat_seq_show() calls hlist_for_each_entry_rcu(tb, head, tb_hlist) without rcu_read_lock() will trigger a warning, net/ipv4/fib_trie.c:2579 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by proc01/115277: #0: c0000014507acf00 (&p->lock){+.+.}-{3:3}, at: seq_read+0x58/0x670 Call Trace: dump_stack+0xf4/0x164 (unreliable) lockdep_rcu_suspicious+0x140/0x164 fib_triestat_seq_show+0x750/0x880 seq_read+0x1a0/0x670 proc_reg_read+0x10c/0x1b0 __vfs_read+0x3c/0x70 vfs_read+0xac/0x170 ksys_read+0x7c/0x140 system_call+0x5c/0x68 Fix it by adding a pair of rcu_read_lock/unlock() and use cond_resched_rcu() to avoid the situation where walking of a large number of items may prevent scheduling for a long time. Signed-off-by: Qian Cai Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ff0c24371e33..3be0affbabd3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2577,6 +2577,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) " %zd bytes, size of tnode: %zd bytes.\n", LEAF_SIZE, TNODE_SIZE(0)); + rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; @@ -2596,7 +2597,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) trie_show_usage(seq, t->stats); #endif } + cond_resched_rcu(); } + rcu_read_unlock(); return 0; } -- cgit v1.2.3-58-ga151 From 5c3e82fe159622e46e91458c1a6509c321a62820 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 27 Mar 2020 11:07:51 +0800 Subject: sctp: fix refcount bug in sctp_wfree We should iterate over the datamsgs to move all chunks(skbs) to newsk. The following case cause the bug: for the trouble SKB, it was in outq->transmitted list sctp_outq_sack sctp_check_transmitted SKB was moved to outq->sacked list then throw away the sack queue SKB was deleted from outq->sacked (but it was held by datamsg at sctp_datamsg_to_asoc So, sctp_wfree was not called here) then migrate happened sctp_for_each_tx_datachunk( sctp_clear_owner_w); sctp_assoc_migrate(); sctp_for_each_tx_datachunk( sctp_set_owner_w); SKB was not in the outq, and was not changed to newsk finally __sctp_outq_teardown sctp_chunk_put (for another skb) sctp_datamsg_put __kfree_skb(msg->frag_list) sctp_wfree (for SKB) SKB->sk was still oldsk (skb->sk != asoc->base.sk). Reported-and-tested-by: syzbot+cea71eec5d6de256d54d@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1b56fc440606..757740115e93 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -147,29 +147,44 @@ static void sctp_clear_owner_w(struct sctp_chunk *chunk) skb_orphan(chunk->skb); } +#define traverse_and_process() \ +do { \ + msg = chunk->msg; \ + if (msg == prev_msg) \ + continue; \ + list_for_each_entry(c, &msg->chunks, frag_list) { \ + if ((clear && asoc->base.sk == c->skb->sk) || \ + (!clear && asoc->base.sk != c->skb->sk)) \ + cb(c); \ + } \ + prev_msg = msg; \ +} while (0) + static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, + bool clear, void (*cb)(struct sctp_chunk *)) { + struct sctp_datamsg *msg, *prev_msg = NULL; struct sctp_outq *q = &asoc->outqueue; + struct sctp_chunk *chunk, *c; struct sctp_transport *t; - struct sctp_chunk *chunk; list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) list_for_each_entry(chunk, &t->transmitted, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->retransmit, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->sacked, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->abandoned, transmitted_list) - cb(chunk); + traverse_and_process(); list_for_each_entry(chunk, &q->out_chunk_list, list) - cb(chunk); + traverse_and_process(); } static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk, @@ -9574,9 +9589,9 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * paths won't try to lock it and then oldsk. */ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); - sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); + sctp_for_each_tx_datachunk(assoc, true, sctp_clear_owner_w); sctp_assoc_migrate(assoc, newsk); - sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); + sctp_for_each_tx_datachunk(assoc, false, sctp_set_owner_w); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. -- cgit v1.2.3-58-ga151 From 582eea230536a6f104097dd46205822005d5fe3a Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 26 Mar 2020 20:47:46 -0300 Subject: sctp: fix possibly using a bad saddr with a given dst Under certain circumstances, depending on the order of addresses on the interfaces, it could be that sctp_v[46]_get_dst() would return a dst with a mismatched struct flowi. For example, if when walking through the bind addresses and the first one is not a match, it saves the dst as a fallback (added in 410f03831c07), but not the flowi. Then if the next one is also not a match, the previous dst will be returned but with the flowi information for the 2nd address, which is wrong. The fix is to use a locally stored flowi that can be used for such attempts, and copy it to the parameter only in case it is a possible match, together with the corresponding dst entry. The patch updates IPv6 code mostly just to be in sync. Even though the issue is also present there, it fallback is not expected to work with IPv6. Fixes: 410f03831c07 ("sctp: add routing output fallback") Reported-by: Jin Meng Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 20 ++++++++++++++------ net/sctp/protocol.c | 28 +++++++++++++++++++--------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bc734cfaa29e..c87af430107a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -228,7 +228,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, { struct sctp_association *asoc = t->asoc; struct dst_entry *dst = NULL; - struct flowi6 *fl6 = &fl->u.ip6; + struct flowi _fl; + struct flowi6 *fl6 = &_fl.u.ip6; struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; @@ -238,7 +239,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, enum sctp_scope scope; __u8 matchlen = 0; - memset(fl6, 0, sizeof(struct flowi6)); + memset(&_fl, 0, sizeof(_fl)); fl6->daddr = daddr->v6.sin6_addr; fl6->fl6_dport = daddr->v6.sin6_port; fl6->flowi6_proto = IPPROTO_SCTP; @@ -276,8 +277,11 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); - if (!asoc || saddr) + if (!asoc || saddr) { + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); goto out; + } bp = &asoc->base.bind_addr; scope = sctp_scope(daddr); @@ -300,6 +304,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if ((laddr->a.sa.sa_family == AF_INET6) && (sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) { rcu_read_unlock(); + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); goto out; } } @@ -338,6 +344,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (!IS_ERR_OR_NULL(dst)) dst_release(dst); dst = bdst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); break; } @@ -351,6 +359,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, dst_release(dst); dst = bdst; matchlen = bmatchlen; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); } rcu_read_unlock(); @@ -359,14 +369,12 @@ out: struct rt6_info *rt; rt = (struct rt6_info *)dst; - t->dst = dst; t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, - &fl6->saddr); + &fl->u.ip6.saddr); } else { t->dst = NULL; - pr_debug("no route\n"); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 78af2fcf90cc..092d1afdee0d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -409,7 +409,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, { struct sctp_association *asoc = t->asoc; struct rtable *rt; - struct flowi4 *fl4 = &fl->u.ip4; + struct flowi _fl; + struct flowi4 *fl4 = &_fl.u.ip4; struct sctp_bind_addr *bp; struct sctp_sockaddr_entry *laddr; struct dst_entry *dst = NULL; @@ -419,7 +420,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (t->dscp & SCTP_DSCP_SET_MASK) tos = t->dscp & SCTP_DSCP_VAL_MASK; - memset(fl4, 0x0, sizeof(struct flowi4)); + memset(&_fl, 0x0, sizeof(_fl)); fl4->daddr = daddr->v4.sin_addr.s_addr; fl4->fl4_dport = daddr->v4.sin_port; fl4->flowi4_proto = IPPROTO_SCTP; @@ -438,8 +439,11 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, &fl4->saddr); rt = ip_route_output_key(sock_net(sk), fl4); - if (!IS_ERR(rt)) + if (!IS_ERR(rt)) { dst = &rt->dst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); + } /* If there is no association or if a source address is passed, no * more validation is required. @@ -502,27 +506,33 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr, false); if (!odev || odev->ifindex != fl4->flowi4_oif) { - if (!dst) + if (!dst) { dst = &rt->dst; - else + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); + } else { dst_release(&rt->dst); + } continue; } dst_release(dst); dst = &rt->dst; + t->dst = dst; + memcpy(fl, &_fl, sizeof(_fl)); break; } out_unlock: rcu_read_unlock(); out: - t->dst = dst; - if (dst) + if (dst) { pr_debug("rt_dst:%pI4, rt_src:%pI4\n", - &fl4->daddr, &fl4->saddr); - else + &fl->u.ip4.daddr, &fl->u.ip4.saddr); + } else { + t->dst = NULL; pr_debug("no route\n"); + } } /* For v4, the source address is cached in the route entry(dst). So no need -- cgit v1.2.3-58-ga151 From 25629fdaff2ff509dd0b3f5ff93d70a75e79e0a1 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Fri, 27 Mar 2020 19:56:39 +0100 Subject: net, ip_tunnel: fix interface lookup with no key when creating a new ipip interface with no local/remote configuration, the lookup is done with TUNNEL_NO_KEY flag, making it impossible to match the new interface (only possible match being fallback or metada case interface); e.g: `ip link add tunl1 type ipip dev eth0` To fix this case, adding a flag check before the key comparison so we permit to match an interface with no local/remote config; it also avoids breaking possible userland tools relying on TUNNEL_NO_KEY flag and uninitialised key. context being on my side, I'm creating an extra ipip interface attached to the physical one, and moving it to a dedicated namespace. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: William Dauchy Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 74e1d964a615..cd4b84310d92 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -142,11 +142,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, cand = t; } - if (flags & TUNNEL_NO_KEY) - goto skip_key_lookup; - hlist_for_each_entry_rcu(t, head, hash_node) { - if (t->parms.i_key != key || + if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) || t->parms.iph.saddr != 0 || t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) @@ -158,7 +155,6 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, cand = t; } -skip_key_lookup: if (cand) return cand; -- cgit v1.2.3-58-ga151 From bde1b56f898ca8112912d7b36e55e1543b3be0cf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 30 Mar 2020 23:31:45 +0800 Subject: udp: initialize is_flist with 0 in udp_gro_receive Without NAPI_GRO_CB(skb)->is_flist initialized, when the dev doesn't support NETIF_F_GRO_FRAGLIST, is_flist can still be set and fraglist will be used in udp_gro_receive(). So fix it by initializing is_flist with 0 in udp_gro_receive. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Xin Long Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/udp_offload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 1a98583a79f4..e67a66fbf27b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -453,6 +453,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; + NAPI_GRO_CB(skb)->is_flist = 0; if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1; -- cgit v1.2.3-58-ga151 From cf673ed0e057a2dd68d930c6d7e30d53c70c5789 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Mar 2020 18:51:29 +0200 Subject: net: fix fraglist segmentation reference count leak Xin Long says: On udp rx path udp_rcv_segment() may do segment where the frag skbs will get the header copied from the head skb in skb_segment_list() by calling __copy_skb_header(), which could overwrite the frag skbs' extensions by __skb_ext_copy() and cause a leak. This issue was found after loading esp_offload where a sec path ext is set in the skb. Fix this by discarding head state of the fraglist skb before replacing its contents. Fixes: 3a1296a38d0cf62 ("net: Support GRO/GSO fraglist chaining.") Cc: Steffen Klassert Reported-by: Xiumei Mu Tested-by: Xin Long Signed-off-by: Florian Westphal Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e1101a4f90a6..bea447f38dcc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3668,6 +3668,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(nskb, -skb_network_offset(nskb) + offset); + skb_release_head_state(nskb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); -- cgit v1.2.3-58-ga151 From 16deaef205b6da832f88a98770c55c8b85aaadfa Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Sat, 28 Mar 2020 15:09:55 +0800 Subject: net: hns3: drop the WQ_MEM_RECLAIM flag when allocating WQ The WQ in hns3 driver is allocated with WQ_MEM_RECLAIM flag in order to guarantee forward progress, which may cause hns3' WQ_MEM_RECLAIM WQ flushing infiniband' !WQ_MEM_RECLAIM WQ warning: [11246.200168] hns3 0000:bd:00.1: Reset done, hclge driver initialization finished. [11246.209979] hns3 0000:bd:00.1 eth7: net open [11246.227608] ------------[ cut here ]------------ [11246.237370] workqueue: WQ_MEM_RECLAIM hclge:hclge_service_task [hclge] is flushing !WQ_MEM_RECLAIM infiniband:0x0 [11246.237391] WARNING: CPU: 50 PID: 2279 at ./kernel/workqueue.c:2605 check_flush_dependency+0xcc/0x140 [11246.260412] Modules linked in: hclgevf hns_roce_hw_v2 rdma_test(O) hns3 xt_CHECKSUM iptable_mangle xt_conntrack ipt_REJECT nf_reject_ipv4 ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter bpfilter vfio_iommu_type1 vfio_pci vfio_virqfd vfio ib_isert iscsi_target_mod ib_ipoib ib_umad rpcrdma ib_iser libiscsi scsi_transport_iscsi aes_ce_blk crypto_simd cryptd aes_ce_cipher sunrpc nls_iso8859_1 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce joydev input_leds hid_generic usbkbd usbmouse sbsa_gwdt usbhid usb_storage hid ses hclge hisi_zip hisi_hpre hisi_sec2 hnae3 hisi_qm ahci hisi_trng_v2 evbug uacce rng_core gpio_dwapb autofs4 hisi_sas_v3_hw megaraid_sas hisi_sas_main libsas scsi_transport_sas [last unloaded: hns_roce_hw_v2] [11246.325742] CPU: 50 PID: 2279 Comm: kworker/50:0 Kdump: loaded Tainted: G O 5.4.0-rc4+ #1 [11246.335181] Hardware name: Huawei TaiShan 200 (Model 2280)/BC82AMDD, BIOS 2280-V2 CS V3.B140.01 12/18/2019 [11246.344802] Workqueue: hclge hclge_service_task [hclge] [11246.350007] pstate: 60c00009 (nZCv daif +PAN +UAO) [11246.354779] pc : check_flush_dependency+0xcc/0x140 [11246.359549] lr : check_flush_dependency+0xcc/0x140 [11246.364317] sp : ffff800268a73990 [11246.367618] x29: ffff800268a73990 x28: 0000000000000001 [11246.372907] x27: ffffcbe4f5868000 x26: ffffcbe4f5541000 [11246.378196] x25: 00000000000000b8 x24: ffff002fdd0ff868 [11246.383483] x23: ffff002fdd0ff800 x22: ffff2027401ba600 [11246.388770] x21: 0000000000000000 x20: ffff002fdd0ff800 [11246.394059] x19: ffff202719293b00 x18: ffffcbe4f5541948 [11246.399347] x17: 000000006f8ad8dd x16: 0000000000000002 [11246.404634] x15: ffff8002e8a734f7 x14: 6c66207369205d65 [11246.409922] x13: 676c63685b206b73 x12: 61745f6563697672 [11246.415208] x11: 65735f65676c6368 x10: 3a65676c6368204d [11246.420494] x9 : 49414c4345525f4d x8 : 6e6162696e69666e [11246.425782] x7 : 69204d49414c4345 x6 : ffffcbe4f5765145 [11246.431068] x5 : 0000000000000000 x4 : 0000000000000000 [11246.436355] x3 : 0000000000000030 x2 : 00000000ffffffff [11246.441642] x1 : 3349eb1ac5310100 x0 : 0000000000000000 [11246.446928] Call trace: [11246.449363] check_flush_dependency+0xcc/0x140 [11246.453785] flush_workqueue+0x110/0x410 [11246.457691] ib_cache_cleanup_one+0x54/0x468 [11246.461943] __ib_unregister_device+0x70/0xa8 [11246.466279] ib_unregister_device+0x2c/0x40 [11246.470455] hns_roce_exit+0x34/0x198 [hns_roce_hw_v2] [11246.475571] __hns_roce_hw_v2_uninit_instance.isra.56+0x3c/0x58 [hns_roce_hw_v2] [11246.482934] hns_roce_hw_v2_reset_notify+0xd8/0x210 [hns_roce_hw_v2] [11246.489261] hclge_notify_roce_client+0x84/0xe0 [hclge] [11246.494464] hclge_reset_rebuild+0x60/0x730 [hclge] [11246.499320] hclge_reset_service_task+0x400/0x5a0 [hclge] [11246.504695] hclge_service_task+0x54/0x698 [hclge] [11246.509464] process_one_work+0x15c/0x458 [11246.513454] worker_thread+0x144/0x520 [11246.517186] kthread+0xfc/0x128 [11246.520314] ret_from_fork+0x10/0x18 [11246.523873] ---[ end trace eb980723699c2585 ]--- [11246.528710] hns3 0000:bd:00.2: Func clear success after reset. [11246.528747] hns3 0000:bd:00.0: Func clear success after reset. [11246.907710] hns3 0000:bd:00.1 eth7: link up According to [1] and [2]: There seems to be no specific guidance about how to handling the forward progress guarantee of network device's WQ yet, and other network device's WQ seem to be marked with WQ_MEM_RECLAIM without a clear reason. So this patch removes the WQ_MEM_RECLAIM flag when allocating WQ to aviod the above warning. 1. https://www.spinics.net/lists/netdev/msg631646.html 2. https://www.spinics.net/lists/netdev/msg632097.html Fixes: 0ea68902256e ("net: hns3: allocate WQ with WQ_MEM_RECLAIM flag") Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d3b0cd74ecd2..b3518070306b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10666,7 +10666,7 @@ static int hclge_init(void) { pr_info("%s is initializing\n", HCLGE_NAME); - hclge_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", 0, 0, HCLGE_NAME); if (!hclge_wq) { pr_err("%s: failed to create workqueue\n", HCLGE_NAME); return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 0510d85a7f6a..3c58f0bbaebf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3188,7 +3188,7 @@ static int hclgevf_init(void) { pr_info("%s is initializing\n", HCLGEVF_NAME); - hclgevf_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGEVF_NAME); + hclgevf_wq = alloc_workqueue("%s", 0, 0, HCLGEVF_NAME); if (!hclgevf_wq) { pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME); return -ENOMEM; -- cgit v1.2.3-58-ga151 From 74ef402e134b5ebe4bcfbcdd55d2e7278a724620 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Sat, 28 Mar 2020 15:09:56 +0800 Subject: net: hns3: fix for fraglist SKB headlen not handling correctly When the fraglist SKB headlen is larger than zero, current code still handle the fraglist SKB linear data as frag data, which may cause TX error. This patch adds a new DESC_TYPE_FRAGLIST_SKB type to handle the mapping and unmapping of the fraglist SKB linear data buffer. Fixes: 8ae10cfb5089 ("net: hns3: support tx-scatter-gather-fraglist feature") Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index a3e4081b84ba..5587605d6deb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -78,6 +78,7 @@ enum hns_desc_type { DESC_TYPE_SKB, + DESC_TYPE_FRAGLIST_SKB, DESC_TYPE_PAGE, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a7f40aa1a0ea..6936384ee896 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1106,6 +1106,10 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, if (unlikely(ret < 0)) return ret; + dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); + } else if (type == DESC_TYPE_FRAGLIST_SKB) { + struct sk_buff *skb = (struct sk_buff *)priv; + dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); } else { frag = (skb_frag_t *)priv; @@ -1144,8 +1148,9 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */ desc_cb->priv = priv; desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k; - desc_cb->type = (type == DESC_TYPE_SKB && !k) ? - DESC_TYPE_SKB : DESC_TYPE_PAGE; + desc_cb->type = ((type == DESC_TYPE_FRAGLIST_SKB || + type == DESC_TYPE_SKB) && !k) ? + type : DESC_TYPE_PAGE; /* now, fill the descriptor */ desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k); @@ -1354,7 +1359,9 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) ring_ptr_move_bw(ring, next_to_use); /* unmap the descriptor dma address */ - if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB) + if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB || + ring->desc_cb[ring->next_to_use].type == + DESC_TYPE_FRAGLIST_SKB) dma_unmap_single(dev, ring->desc_cb[ring->next_to_use].dma, ring->desc_cb[ring->next_to_use].length, @@ -1447,7 +1454,8 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) goto out; skb_walk_frags(skb, frag_skb) { - ret = hns3_fill_skb_to_desc(ring, frag_skb, DESC_TYPE_PAGE); + ret = hns3_fill_skb_to_desc(ring, frag_skb, + DESC_TYPE_FRAGLIST_SKB); if (unlikely(ret < 0)) goto fill_err; @@ -2356,7 +2364,7 @@ static int hns3_map_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb) static void hns3_unmap_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb) { - if (cb->type == DESC_TYPE_SKB) + if (cb->type == DESC_TYPE_SKB || cb->type == DESC_TYPE_FRAGLIST_SKB) dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); else if (cb->length) -- cgit v1.2.3-58-ga151 From 944de4847af1544856486afa6a99c946c28eda57 Mon Sep 17 00:00:00 2001 From: Guojia Liao Date: Sat, 28 Mar 2020 15:09:57 +0800 Subject: net: hns3: fix RSS config lost after VF reset. Currently, VF's RSS configuration would be set to default after VF reset, the the user's one will loss. To fix it, this patch separates hclgevf_rss_init_hw() into two parts, one sets up the default RSS configuration and just be called when driver loading, one configures the hardware and be called by driver loading or reset. Fixes: d97b30721301 ("net: hns3: Add RSS tuples support for VF") Signed-off-by: Guojia Liao Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 52 ++++++++++++---------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 3c58f0bbaebf..768240f5dc4a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2124,50 +2124,51 @@ static int hclgevf_config_gro(struct hclgevf_dev *hdev, bool en) return ret; } -static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) +static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) { struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; - int ret; + struct hclgevf_rss_tuple_cfg *tuple_sets; u32 i; + rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_TOEPLITZ; rss_cfg->rss_size = hdev->nic.kinfo.rss_size; - + tuple_sets = &rss_cfg->rss_tuple_sets; if (hdev->pdev->revision >= 0x21) { rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_SIMPLE; memcpy(rss_cfg->rss_hash_key, hclgevf_hash_key, HCLGEVF_RSS_KEY_SIZE); + tuple_sets->ipv4_tcp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; + tuple_sets->ipv4_udp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; + tuple_sets->ipv4_sctp_en = HCLGEVF_RSS_INPUT_TUPLE_SCTP; + tuple_sets->ipv4_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; + tuple_sets->ipv6_tcp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; + tuple_sets->ipv6_udp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; + tuple_sets->ipv6_sctp_en = HCLGEVF_RSS_INPUT_TUPLE_SCTP; + tuple_sets->ipv6_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; + } + + /* Initialize RSS indirect table */ + for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) + rss_cfg->rss_indirection_tbl[i] = i % rss_cfg->rss_size; +} + +static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) +{ + struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; + int ret; + + if (hdev->pdev->revision >= 0x21) { ret = hclgevf_set_rss_algo_key(hdev, rss_cfg->hash_algo, rss_cfg->rss_hash_key); if (ret) return ret; - rss_cfg->rss_tuple_sets.ipv4_tcp_en = - HCLGEVF_RSS_INPUT_TUPLE_OTHER; - rss_cfg->rss_tuple_sets.ipv4_udp_en = - HCLGEVF_RSS_INPUT_TUPLE_OTHER; - rss_cfg->rss_tuple_sets.ipv4_sctp_en = - HCLGEVF_RSS_INPUT_TUPLE_SCTP; - rss_cfg->rss_tuple_sets.ipv4_fragment_en = - HCLGEVF_RSS_INPUT_TUPLE_OTHER; - rss_cfg->rss_tuple_sets.ipv6_tcp_en = - HCLGEVF_RSS_INPUT_TUPLE_OTHER; - rss_cfg->rss_tuple_sets.ipv6_udp_en = - HCLGEVF_RSS_INPUT_TUPLE_OTHER; - rss_cfg->rss_tuple_sets.ipv6_sctp_en = - HCLGEVF_RSS_INPUT_TUPLE_SCTP; - rss_cfg->rss_tuple_sets.ipv6_fragment_en = - HCLGEVF_RSS_INPUT_TUPLE_OTHER; - ret = hclgevf_set_rss_input_tuple(hdev, rss_cfg); if (ret) return ret; } - /* Initialize RSS indirect table */ - for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) - rss_cfg->rss_indirection_tbl[i] = i % rss_cfg->rss_size; - ret = hclgevf_set_rss_indir_table(hdev); if (ret) return ret; @@ -2764,6 +2765,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; /* Initialize RSS for this VF */ + hclgevf_rss_init_cfg(hdev); ret = hclgevf_rss_init_hw(hdev); if (ret) { dev_err(&hdev->pdev->dev, @@ -2936,6 +2938,8 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) rss_indir[i] = i % kinfo->rss_size; + hdev->rss_cfg.rss_size = kinfo->rss_size; + ret = hclgevf_set_rss(handle, rss_indir, NULL, 0); if (ret) dev_err(&hdev->pdev->dev, "set rss indir table fail, ret=%d\n", -- cgit v1.2.3-58-ga151 From a9775bb64aa725ea2f915cb5dfc32c1bac8a7175 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Sat, 28 Mar 2020 15:09:58 +0800 Subject: net: hns3: fix set and get link ksettings issue When device is not open, the service task which update the port information per second is not running. In this case, the port capabilities, including speed ability, autoneg ability, media type, may be incorrect. Then get/set link ksetting may fail. This patch fixes it by updating the port information before getting/ setting link ksettings when device is not open, and start timer task immediately by setting delay time to 0 when device opens. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index b3518070306b..0e03c3af5de5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6765,7 +6765,7 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) struct hclge_dev *hdev = vport->back; if (enable) { - hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + hclge_task_schedule(hdev, 0); } else { /* Set the DOWN flag here to disable link updating */ set_bit(HCLGE_STATE_DOWN, &hdev->state); @@ -8979,6 +8979,12 @@ static void hclge_get_media_type(struct hnae3_handle *handle, u8 *media_type, struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + /* When nic is down, the service task is not running, doesn't update + * the port information per second. Query the port information before + * return the media type, ensure getting the correct media information. + */ + hclge_update_port_info(hdev); + if (media_type) *media_type = hdev->hw.mac.media_type; -- cgit v1.2.3-58-ga151