diff options
Diffstat (limited to 'net')
387 files changed, 20363 insertions, 12835 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 90cc2bdd4064..61bf2a06e85d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = vlan_tx_tag_get_id(skb); + u16 vlan_id = skb_vlan_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 8ac8a5cc2143..c92b52f37d38 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -238,6 +238,13 @@ nla_put_failure: return -EMSGSIZE; } +static struct net *vlan_get_link_net(const struct net_device *dev) +{ + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; + + return dev_net(real_dev); +} + struct rtnl_link_ops vlan_link_ops __read_mostly = { .kind = "vlan", .maxtype = IFLA_VLAN_MAX, @@ -250,6 +257,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = { .dellink = unregister_vlan_dev, .get_size = vlan_get_size, .fill_info = vlan_fill_info, + .get_link_net = vlan_get_link_net, }; int __init vlan_netlink_init(void) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index daa749c8b3fb..d8e376a5f0f1 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -524,6 +524,12 @@ static int p9_virtio_probe(struct virtio_device *vdev) int err; struct virtio_chan *chan; + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); if (!chan) { pr_err("Failed to allocate virtio 9P channel\n"); diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 11660a3aab5a..c6fc8f756c9a 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -62,6 +62,7 @@ config BATMAN_ADV_MCAST config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV + depends on DEBUG_FS help This is an option for use by developers; most people should say N here. This enables compilation of support for diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1e8053976e83..00e00e09b000 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -26,9 +26,8 @@ #include "bat_algo.h" #include "network-coding.h" - /** - * batadv_dup_status - duplicate status + * enum batadv_dup_status - duplicate status * @BATADV_NO_DUP: the packet is a duplicate * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the * neighbor) @@ -517,7 +516,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @packet_len: (total) length of the OGM * @send_time: timestamp (jiffies) when the packet is to be sent - * @direktlink: true if this is a direct link packet + * @directlink: true if this is a direct link packet * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @forw_packet: the forwarded packet which should be checked @@ -879,7 +878,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bat_iv.bcast_own[word_index]); + word = &orig_node->bat_iv.bcast_own[word_index]; batadv_bit_get_packet(bat_priv, word, 1, 0); if_num = hard_iface->if_num; @@ -1362,10 +1361,10 @@ out: return ret; } - /** * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if * @skb: the skb containing the OGM + * @ogm_offset: offset from skb->data to start of ogm header * @orig_node: the (cached) orig node for the originator of this OGM * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered @@ -1664,7 +1663,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, offset = if_num * BATADV_NUM_WORDS; spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); - word = &(orig_neigh_node->bat_iv.bcast_own[offset]); + word = &orig_neigh_node->bat_iv.bcast_own[offset]; bit_pos = if_incoming_seqno - 2; bit_pos -= ntohl(ogm_packet->seqno); batadv_set_bit(word, bit_pos); @@ -1902,10 +1901,10 @@ out: * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than * neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison - * @if_outgoing: outgoing interface for the first neighbor + * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor - + * * Returns true if the metric via neigh1 is equally good or better than * the metric via neigh2, false otherwise. */ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 9586750022f5..e3da07a64026 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -29,7 +29,6 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n) bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE); } - /* receive and process one packet within the sequence number window. * * returns: diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index cc2407351d36..2acaafe60188 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -29,8 +29,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, diff = last_seqno - curr_seqno; if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) return 0; - else - return test_bit(diff, seq_bits) != 0; + return test_bit(diff, seq_bits) != 0; } /* turn corresponding bit on, so we can remember that we got the packet */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index a957c8140721..ac4b96eccade 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -69,7 +69,6 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data, return hash % size; } - /* compares address and vid of two backbone gws */ static int batadv_compare_backbone_gw(const struct hlist_node *node, const void *data2) @@ -245,14 +244,14 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) spin_unlock_bh(list_lock); } - /* all claims gone, intialize CRC */ + /* all claims gone, initialize CRC */ backbone_gw->crc = BATADV_BLA_CRC_INIT; } /** * batadv_bla_send_claim - sends a claim frame according to the provided info * @bat_priv: the bat priv with all the soft interface information - * @orig: the mac address to be announced within the claim + * @mac: the mac address to be announced within the claim * @vid: the VLAN ID * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ @@ -364,6 +363,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @orig: the mac address of the originator * @vid: the VLAN ID + * @own_backbone: set if the requested backbone is local * * searches for the backbone gw or creates a new one if it could not * be found. @@ -454,6 +454,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, /** * batadv_bla_answer_request - answer a bla request by sending own claims * @bat_priv: the bat priv with all the soft interface information + * @primary_if: interface where the request came on * @vid: the vid where the request came on * * Repeat all of our own claims, and finally send an ANNOUNCE frame @@ -660,7 +661,6 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, if (unlikely(!backbone_gw)) return 1; - /* handle as ANNOUNCE frame */ backbone_gw->lasttime = jiffies; crc = ntohs(*((__be16 *)(&an_addr[4]))); @@ -775,6 +775,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv, /** * batadv_check_claim_group * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary interface of this batman interface * @hw_src: the Hardware source in the ARP Header * @hw_dst: the Hardware destination in the ARP Header * @ethhdr: pointer to the Ethernet header of the claim frame @@ -846,10 +847,10 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv, return 2; } - /** * batadv_bla_process_claim * @bat_priv: the bat priv with all the soft interface information + * @primary_if: the primary hard interface of this batman soft interface * @skb: the frame to be checked * * Check if this is a claim frame, and process it accordingly. @@ -1327,7 +1328,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, goto out; } /* not found, add a new entry (overwrite the oldest entry) - * and allow it, its the first occurence. + * and allow it, its the first occurrence. */ curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1); curr %= BATADV_DUPLIST_SIZE; @@ -1343,8 +1344,6 @@ out: return ret; } - - /** * batadv_bla_is_backbone_gw_orig * @bat_priv: the bat priv with all the soft interface information @@ -1386,7 +1385,6 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig, return false; } - /** * batadv_bla_is_backbone_gw * @skb: the frame to be checked @@ -1476,7 +1474,6 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, if (!atomic_read(&bat_priv->bridge_loop_avoidance)) goto allow; - if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index a12e25efaf6f..a4972874c056 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -233,7 +233,6 @@ static int batadv_debug_log_setup(struct batadv_priv *bat_priv) static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) { - return; } #endif @@ -405,6 +404,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \ .release = single_release, \ }, \ } + static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO, batadv_originators_hardif_open); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b5981113c9a7..aad022dd15df 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1100,6 +1100,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); } + /** * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local * DAT storage only diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index d76e1d06c5b5..2fe0764c64be 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -25,9 +25,7 @@ #include <linux/if_arp.h> -/** - * BATADV_DAT_ADDR_MAX - maximum address value in the DHT space - */ +/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */ #define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) void batadv_dat_status_update(struct net_device *net_dev); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index fc1835c6bb40..3d1dcaa3e8b5 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -23,7 +23,6 @@ #include "hard-interface.h" #include "soft-interface.h" - /** * batadv_frag_clear_chain - delete entries in the fragment buffer chain * @head: head of chain with entries. @@ -251,7 +250,7 @@ batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb) kfree(entry); /* Make room for the rest of the fragments. */ - if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) { + if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { kfree_skb(skb_out); skb_out = NULL; goto free; @@ -434,7 +433,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb, * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE */ mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE); - max_fragment_size = (mtu - header_size - ETH_HLEN); + max_fragment_size = mtu - header_size; max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; /* Don't even try to fragment, if we need more than 16 fragments */ diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 5d7a0e66a22b..d848cf6676a2 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -41,8 +41,7 @@ batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) if (!hlist_empty(&frags_entry->head) && batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT)) return true; - else - return false; + return false; } #endif /* _NET_BATMAN_ADV_FRAGMENTATION_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 90cff585b37d..27649e85f3f6 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -775,6 +775,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, return ret; } + /** * batadv_gw_out_of_range - check if the dhcp request destination is the best gw * @bat_priv: the bat priv with all the soft interface information @@ -810,7 +811,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, goto out; gw_node = batadv_gw_node_get(bat_priv, orig_dst_node); - if (!gw_node->bandwidth_down == 0) + if (!gw_node) goto out; switch (atomic_read(&bat_priv->gw_mode)) { diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d1183e882167..12fc77bef23f 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -41,7 +41,6 @@ #include "network-coding.h" #include "fragmentation.h" - /* List manipulations on hardif_list have to be rtnl_lock()'ed, * list traversals just rcu-locked */ @@ -403,6 +402,9 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_free; } + /* reset control block to avoid left overs from previous users */ + memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); + /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ @@ -651,7 +653,7 @@ static struct batadv_tvlv_handler /** * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and * possibly free it - * @tvlv_handler: the tvlv container to free + * @tvlv: the tvlv container to free */ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) { @@ -796,11 +798,11 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, } /** - * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accomodate + * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate * requested packet size * @packet_buff: packet buffer * @packet_buff_len: packet buffer size - * @packet_min_len: requested packet minimum size + * @min_packet_len: requested packet minimum size * @additional_packet_len: requested additional packet size on top of minimum * size * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index a1fcd884f0b1..4d2318829a34 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2014.4.0" +#define BATADV_SOURCE_VERSION "2015.0" #endif /* B.A.T.M.A.N. parameters */ @@ -92,9 +92,8 @@ /* numbers of originator to contact for any PUT/GET DHT operation */ #define BATADV_DAT_CANDIDATES_NUM 3 -/** - * BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ - * at most from the primary one in order to be still considered acceptable +/* BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ + * at most from the primary one in order to be still considered acceptable */ #define BATADV_TQ_SIMILARITY_THRESHOLD 50 @@ -313,10 +312,10 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, * - when adding 128 - it is neither a predecessor nor a successor, * - after adding more than 127 to the starting value - it is a successor */ -#define batadv_seq_before(x, y) ({typeof(x) _d1 = (x); \ - typeof(y) _d2 = (y); \ - typeof(x) _dummy = (_d1 - _d2); \ - (void) (&_d1 == &_d2); \ +#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \ + typeof(y)_d2 = (y); \ + typeof(x)_dummy = (_d1 - _d2); \ + (void)(&_d1 == &_d2); \ _dummy > batadv_smallest_signed_int(_dummy); }) #define batadv_seq_after(x, y) batadv_seq_before(y, x) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index ab6bb2af1d45..b24e4bb64fb5 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -685,11 +685,13 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, if (orig_initialized) atomic_dec(&bat_priv->mcast.num_disabled); orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST; - /* If mcast support is being switched off increase the disabled - * mcast node counter. + /* If mcast support is being switched off or if this is an initial + * OGM without mcast support then increase the disabled mcast + * node counter. */ } else if (!orig_mcast_enabled && - orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) { + (orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST || + !orig_initialized)) { atomic_inc(&bat_priv->mcast.num_disabled); orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST; } @@ -738,7 +740,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; - if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) + if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) && + orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST) atomic_dec(&bat_priv->mcast.num_disabled); batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 73b5d45819c1..3a44ebdb43cb 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -50,7 +50,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv) { - return; } static inline enum batadv_forw_mode @@ -67,12 +66,10 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv) static inline void batadv_mcast_free(struct batadv_priv *bat_priv) { - return; } static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node) { - return; } #endif /* CONFIG_BATMAN_ADV_MCAST */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 8d04d174669e..127cc4d7380a 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -133,7 +133,7 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) if (!bat_priv->nc.decoding_hash) goto err; - batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); @@ -1212,8 +1212,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, { if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) return false; - else - return true; + return true; } /** diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6a484514cd3e..90e805aba379 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -570,9 +570,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) batadv_frag_purge_orig(orig_node, NULL); - batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, -1, - "originator timed out"); - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); @@ -678,6 +675,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; + orig_node->last_seen = jiffies; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; #ifdef CONFIG_BATMAN_ADV_MCAST @@ -799,7 +797,6 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, return ifinfo_purged; } - /** * batadv_purge_orig_neighbors - purges neighbors from originator * @bat_priv: the bat priv with all the soft interface information @@ -977,6 +974,9 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) if (batadv_purge_orig_node(bat_priv, orig_node)) { batadv_gw_node_delete(bat_priv, orig_node); hlist_del_rcu(&orig_node->hash_entry); + batadv_tt_global_del_orig(orig_node->bat_priv, + orig_node, -1, + "originator timed out"); batadv_orig_node_free_ref(orig_node); continue; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index db3a9ed734cb..aa4a43696295 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -70,7 +70,6 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid); void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan); - /* hashfunction to choose an entry in a hash table of given size * hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 34e096d2dce1..b81fbbf21a63 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -198,6 +198,7 @@ struct batadv_bla_claim_dst { uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ }; + #pragma pack() /** @@ -376,7 +377,7 @@ struct batadv_frag_packet { uint8_t reserved:4; uint8_t no:4; #else -#error "unknown bitfield endianess" +#error "unknown bitfield endianness" #endif uint8_t dest[ETH_ALEN]; uint8_t orig[ETH_ALEN]; @@ -452,7 +453,7 @@ struct batadv_coded_packet { * @src: address of the source * @dst: address of the destination * @tvlv_len: length of tvlv data following the unicast tvlv header - * @align: 2 bytes to align the header to a 4 byte boundry + * @align: 2 bytes to align the header to a 4 byte boundary */ struct batadv_unicast_tvlv_packet { uint8_t packet_type; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 35f76f2f7824..da83982bf974 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -292,7 +292,6 @@ out: return ret; } - int batadv_recv_icmp_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -443,11 +442,13 @@ batadv_find_router(struct batadv_priv *bat_priv, router = batadv_orig_router_get(orig_node, recv_if); + if (!router) + return router; + /* only consider bonding for recv_if == BATADV_IF_DEFAULT (first hop) * and if activated. */ - if (recv_if == BATADV_IF_DEFAULT || !atomic_read(&bat_priv->bonding) || - !router) + if (!(recv_if == BATADV_IF_DEFAULT && atomic_read(&bat_priv->bonding))) return router; /* bonding: loop through the list of possible routers found @@ -455,7 +456,7 @@ batadv_find_router(struct batadv_priv *bat_priv, * the last chosen bonding candidate (next_candidate). If no such * router is found, use the first candidate found (the previously * chosen bonding candidate might have been the last one in the list). - * If this can't be found either, return the previously choosen + * If this can't be found either, return the previously chosen * router - obviously there are no other candidates. */ rcu_read_lock(); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5467955eb27c..5ec31d7de24f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -36,7 +36,6 @@ #include "bridge_loop_avoidance.h" #include "network-coding.h" - static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index f40cb0436eba..a75dc12f96f8 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -151,7 +151,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ batadv_store_##_name) - #define BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func) \ ssize_t batadv_store_##_name(struct kobject *kobj, \ struct attribute *attr, char *buff, \ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5f59e7f899a0..07b263a437d1 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1780,7 +1780,6 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, batadv_tt_global_del_roaming(bat_priv, tt_global_entry, orig_node, message); - out: if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); @@ -2769,9 +2768,8 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, { if (batadv_is_my_mac(bat_priv, req_dst)) return batadv_send_my_tt_response(bat_priv, tt_data, req_src); - else - return batadv_send_other_tt_response(bat_priv, tt_data, - req_src, req_dst); + return batadv_send_other_tt_response(bat_priv, tt_data, req_src, + req_dst); } static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, @@ -2854,7 +2852,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv, /** * batadv_is_my_client - check if a client is served by the local node * @bat_priv: the bat priv with all the soft interface information - * @addr: the mac adress of the client to check + * @addr: the mac address of the client to check * @vid: VLAN identifier * * Returns true if the client is served by this node, false otherwise. diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 8854c05622a9..9398c3fb4174 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -199,7 +199,6 @@ struct batadv_orig_bat_iv { /** * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh * @orig: originator ethernet address - * @primary_addr: hosts primary interface address * @ifinfo_list: list for routers per outgoing interface * @last_bonding_candidate: pointer to last ifinfo of last used router * @batadv_dat_addr_t: address of the orig node in the distributed hash @@ -244,7 +243,6 @@ struct batadv_orig_bat_iv { */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; - uint8_t primary_addr[ETH_ALEN]; struct hlist_head ifinfo_list; struct batadv_orig_ifinfo *last_bonding_candidate; #ifdef CONFIG_BATMAN_ADV_DAT @@ -970,7 +968,7 @@ struct batadv_tt_orig_list_entry { }; /** - * struct batadv_tt_change_node - structure for tt changes occured + * struct batadv_tt_change_node - structure for tt changes occurred * @list: list node for batadv_priv_tt::changes_list * @change: holds the actual translation table diff data */ diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 76617be1e797..1742b849fcff 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -31,7 +31,7 @@ #define VERSION "0.1" -static struct dentry *lowpan_psm_debugfs; +static struct dentry *lowpan_enable_debugfs; static struct dentry *lowpan_control_debugfs; #define IFACE_NAME_TEMPLATE "bt%d" @@ -55,11 +55,7 @@ struct skb_cb { static LIST_HEAD(bt_6lowpan_devices); static DEFINE_SPINLOCK(devices_lock); -/* If psm is set to 0 (default value), then 6lowpan is disabled. - * Other values are used to indicate a Protocol Service Multiplexer - * value for 6lowpan. - */ -static u16 psm_6lowpan; +static bool enable_6lowpan; /* We are listening incoming connections via this channel */ @@ -390,7 +386,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, drop: dev->stats.rx_dropped++; - kfree_skb(skb); return NET_RX_DROP; } @@ -762,7 +757,7 @@ static bool is_bt_6lowpan(struct hci_conn *hcon) if (hcon->type != LE_LINK) return false; - if (!psm_6lowpan) + if (!enable_6lowpan) return false; return true; @@ -1086,7 +1081,7 @@ static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type) if (!pchan) return -EINVAL; - err = l2cap_chan_connect(pchan, cpu_to_le16(psm_6lowpan), 0, + err = l2cap_chan_connect(pchan, cpu_to_le16(L2CAP_PSM_IPSP), 0, addr, dst_type); BT_DBG("chan %p err %d", pchan, err); @@ -1119,7 +1114,7 @@ static struct l2cap_chan *bt_6lowpan_listen(void) struct l2cap_chan *pchan; int err; - if (psm_6lowpan == 0) + if (!enable_6lowpan) return NULL; pchan = chan_get(); @@ -1131,10 +1126,9 @@ static struct l2cap_chan *bt_6lowpan_listen(void) atomic_set(&pchan->nesting, L2CAP_NESTING_PARENT); - BT_DBG("psm 0x%04x chan %p src type %d", psm_6lowpan, pchan, - pchan->src_type); + BT_DBG("chan %p src type %d", pchan, pchan->src_type); - err = l2cap_add_psm(pchan, addr, cpu_to_le16(psm_6lowpan)); + err = l2cap_add_psm(pchan, addr, cpu_to_le16(L2CAP_PSM_IPSP)); if (err) { l2cap_chan_put(pchan); BT_ERR("psm cannot be added err %d", err); @@ -1220,22 +1214,23 @@ static void disconnect_all_peers(void) spin_unlock(&devices_lock); } -struct set_psm { +struct set_enable { struct work_struct work; - u16 psm; + bool flag; }; -static void do_psm_set(struct work_struct *work) +static void do_enable_set(struct work_struct *work) { - struct set_psm *set_psm = container_of(work, struct set_psm, work); + struct set_enable *set_enable = container_of(work, + struct set_enable, work); - if (set_psm->psm == 0 || psm_6lowpan != set_psm->psm) + if (!set_enable->flag || enable_6lowpan != set_enable->flag) /* Disconnect existing connections if 6lowpan is - * disabled (psm = 0), or if psm changes. + * disabled */ disconnect_all_peers(); - psm_6lowpan = set_psm->psm; + enable_6lowpan = set_enable->flag; if (listen_chan) { l2cap_chan_close(listen_chan, 0); @@ -1244,33 +1239,33 @@ static void do_psm_set(struct work_struct *work) listen_chan = bt_6lowpan_listen(); - kfree(set_psm); + kfree(set_enable); } -static int lowpan_psm_set(void *data, u64 val) +static int lowpan_enable_set(void *data, u64 val) { - struct set_psm *set_psm; + struct set_enable *set_enable; - set_psm = kzalloc(sizeof(*set_psm), GFP_KERNEL); - if (!set_psm) + set_enable = kzalloc(sizeof(*set_enable), GFP_KERNEL); + if (!set_enable) return -ENOMEM; - set_psm->psm = val; - INIT_WORK(&set_psm->work, do_psm_set); + set_enable->flag = !!val; + INIT_WORK(&set_enable->work, do_enable_set); - schedule_work(&set_psm->work); + schedule_work(&set_enable->work); return 0; } -static int lowpan_psm_get(void *data, u64 *val) +static int lowpan_enable_get(void *data, u64 *val) { - *val = psm_6lowpan; + *val = enable_6lowpan; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_psm_fops, lowpan_psm_get, - lowpan_psm_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get, + lowpan_enable_set, "%llu\n"); static ssize_t lowpan_control_write(struct file *fp, const char __user *user_buffer, @@ -1440,9 +1435,9 @@ static struct notifier_block bt_6lowpan_dev_notifier = { static int __init bt_6lowpan_init(void) { - lowpan_psm_debugfs = debugfs_create_file("6lowpan_psm", 0644, - bt_debugfs, NULL, - &lowpan_psm_fops); + lowpan_enable_debugfs = debugfs_create_file("6lowpan_enable", 0644, + bt_debugfs, NULL, + &lowpan_enable_fops); lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644, bt_debugfs, NULL, &lowpan_control_fops); @@ -1452,7 +1447,7 @@ static int __init bt_6lowpan_init(void) static void __exit bt_6lowpan_exit(void) { - debugfs_remove(lowpan_psm_debugfs); + debugfs_remove(lowpan_enable_debugfs); debugfs_remove(lowpan_control_debugfs); if (listen_chan) { diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 29bcafc41adf..7de74635a110 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -64,4 +64,31 @@ config BT_6LOWPAN help IPv6 compression over Bluetooth Low Energy. +config BT_SELFTEST + bool "Bluetooth self testing support" + depends on BT && DEBUG_KERNEL + help + Run self tests when initializing the Bluetooth subsystem. This + is a developer option and can cause significant delay when booting + the system. + + When the Bluetooth subsystem is built as module, then the test + cases are run first thing at module load time. When the Bluetooth + subsystem is compiled into the kernel image, then the test cases + are run late in the initcall hierarchy. + +config BT_SELFTEST_ECDH + bool "ECDH test cases" + depends on BT_LE && BT_SELFTEST + help + Run test cases for ECDH cryptographic functionality used by the + Bluetooth Low Energy Secure Connections feature. + +config BT_SELFTEST_SMP + bool "SMP test cases" + depends on BT_LE && BT_SELFTEST + help + Run test cases for SMP cryptographic functionality, including both + legacy SMP as well as the Secure Connections features. + source "drivers/bluetooth/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index a5432a6a0ae6..8e96e3072266 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -13,6 +13,8 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ - a2mp.o amp.o ecc.o + a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o + +bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 012e3b03589d..ce22e0cfa923 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -31,6 +31,8 @@ #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> +#include "selftest.h" + #define VERSION "2.20" /* Bluetooth sockets */ @@ -716,6 +718,10 @@ static int __init bt_init(void) BT_INFO("Core ver %s", VERSION); + err = bt_selftest(); + if (err < 0) + return err; + bt_debugfs = debugfs_create_dir("bluetooth", NULL); err = bt_sysfs_init(); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 85bcc21e84d2..05f57e491ccb 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -511,13 +511,12 @@ static int bnep_session(void *arg) static struct device *bnep_get_device(struct bnep_session *session) { - struct hci_conn *conn; + struct l2cap_conn *conn = l2cap_pi(session->sock->sk)->chan->conn; - conn = l2cap_pi(session->sock->sk)->chan->conn->hcon; - if (!conn) + if (!conn || !conn->hcon) return NULL; - return &conn->dev; + return &conn->hcon->dev; } static struct device_type bnep_type = { @@ -533,6 +532,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) BT_DBG(""); + if (!l2cap_is_socket(sock)) + return -EBADFD; + baswap((void *) dst, &l2cap_pi(sock->sk)->chan->dst); baswap((void *) src, &l2cap_pi(sock->sk)->chan->src); diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 1ca8a87a0787..75bd2c42e3e7 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -253,8 +253,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 15) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 10); - if (!info && ctrl) { int len = min_t(uint, CAPI_MANUFACTURER_LEN, skb->data[CAPI_MSG_BASELEN + 14]); @@ -270,8 +268,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 32) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); - if (!info && ctrl) { ctrl->version.majorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 16); ctrl->version.minorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 20); @@ -285,8 +281,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s if (skb->len < CAPI_MSG_BASELEN + 17) break; - controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); - if (!info && ctrl) { int len = min_t(uint, CAPI_SERIAL_LEN, skb->data[CAPI_MSG_BASELEN + 16]); diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 67fe5e84e68f..278a194e6af4 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -334,6 +334,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) BT_DBG(""); + if (!l2cap_is_socket(sock)) + return -EBADFD; + session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); if (!session) return -ENOMEM; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fe18825cc8a4..c9b8fa544785 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -25,11 +25,13 @@ /* Bluetooth HCI connection handling. */ #include <linux/export.h> +#include <linux/debugfs.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> +#include "hci_request.h" #include "smp.h" #include "a2mp.h" @@ -546,6 +548,8 @@ int hci_conn_del(struct hci_conn *conn) hci_conn_del_sysfs(conn); + debugfs_remove_recursive(conn->debugfs); + if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); @@ -629,7 +633,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) mgmt_reenable_advertising(hdev); } -static void create_le_conn_complete(struct hci_dev *hdev, u8 status) +static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_conn *conn; @@ -1080,21 +1084,6 @@ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level) } EXPORT_SYMBOL(hci_conn_check_secure); -/* Change link key */ -int hci_conn_change_link_key(struct hci_conn *conn) -{ - BT_DBG("hcon %p", conn); - - if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { - struct hci_cp_change_conn_link_key cp; - cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, - sizeof(cp), &cp); - } - - return 0; -} - /* Switch role */ int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 5dcacf9607e4..3322d3f4c85a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -37,6 +37,8 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" +#include "hci_debugfs.h" #include "smp.h" static void hci_rx_work(struct work_struct *work); @@ -137,941 +139,9 @@ static const struct file_operations dut_mode_fops = { .llseek = default_llseek, }; -static int features_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - u8 p; - - hci_dev_lock(hdev); - for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { - seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p, - hdev->features[p][0], hdev->features[p][1], - hdev->features[p][2], hdev->features[p][3], - hdev->features[p][4], hdev->features[p][5], - hdev->features[p][6], hdev->features[p][7]); - } - if (lmp_le_capable(hdev)) - seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " - "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", - hdev->le_features[0], hdev->le_features[1], - hdev->le_features[2], hdev->le_features[3], - hdev->le_features[4], hdev->le_features[5], - hdev->le_features[6], hdev->le_features[7]); - hci_dev_unlock(hdev); - - return 0; -} - -static int features_open(struct inode *inode, struct file *file) -{ - return single_open(file, features_show, inode->i_private); -} - -static const struct file_operations features_fops = { - .open = features_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int blacklist_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->blacklist, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - hci_dev_unlock(hdev); - - return 0; -} - -static int blacklist_open(struct inode *inode, struct file *file) -{ - return single_open(file, blacklist_show, inode->i_private); -} - -static const struct file_operations blacklist_fops = { - .open = blacklist_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int uuids_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct bt_uuid *uuid; - - hci_dev_lock(hdev); - list_for_each_entry(uuid, &hdev->uuids, list) { - u8 i, val[16]; - - /* The Bluetooth UUID values are stored in big endian, - * but with reversed byte order. So convert them into - * the right order for the %pUb modifier. - */ - for (i = 0; i < 16; i++) - val[i] = uuid->uuid[15 - i]; - - seq_printf(f, "%pUb\n", val); - } - hci_dev_unlock(hdev); - - return 0; -} - -static int uuids_open(struct inode *inode, struct file *file) -{ - return single_open(file, uuids_show, inode->i_private); -} - -static const struct file_operations uuids_fops = { - .open = uuids_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int inquiry_cache_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - struct discovery_state *cache = &hdev->discovery; - struct inquiry_entry *e; - - hci_dev_lock(hdev); - - list_for_each_entry(e, &cache->all, all) { - struct inquiry_data *data = &e->data; - seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", - &data->bdaddr, - data->pscan_rep_mode, data->pscan_period_mode, - data->pscan_mode, data->dev_class[2], - data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), - data->rssi, data->ssp_mode, e->timestamp); - } - - hci_dev_unlock(hdev); - - return 0; -} - -static int inquiry_cache_open(struct inode *inode, struct file *file) -{ - return single_open(file, inquiry_cache_show, inode->i_private); -} - -static const struct file_operations inquiry_cache_fops = { - .open = inquiry_cache_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int link_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct link_key *key; - - rcu_read_lock(); - list_for_each_entry_rcu(key, &hdev->link_keys, list) - seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type, - HCI_LINK_KEY_SIZE, key->val, key->pin_len); - rcu_read_unlock(); - - return 0; -} - -static int link_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, link_keys_show, inode->i_private); -} - -static const struct file_operations link_keys_fops = { - .open = link_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int dev_class_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2], - hdev->dev_class[1], hdev->dev_class[0]); - hci_dev_unlock(hdev); - - return 0; -} - -static int dev_class_open(struct inode *inode, struct file *file) -{ - return single_open(file, dev_class_show, inode->i_private); -} - -static const struct file_operations dev_class_fops = { - .open = dev_class_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int voice_setting_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->voice_setting; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get, - NULL, "0x%4.4llx\n"); - -static int auto_accept_delay_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - hdev->auto_accept_delay = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int auto_accept_delay_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->auto_accept_delay; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, - auto_accept_delay_set, "%llu\n"); - -static ssize_t force_sc_support_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_sc_support_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (test_bit(HCI_UP, &hdev->flags)) - return -EBUSY; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_SC, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_sc_support_fops = { - .open = simple_open, - .read = force_sc_support_read, - .write = force_sc_support_write, - .llseek = default_llseek, -}; - -static ssize_t force_lesc_support_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_LESC, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_lesc_support_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_LESC, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_lesc_support_fops = { - .open = simple_open, - .read = force_lesc_support_read, - .write = force_lesc_support_write, - .llseek = default_llseek, -}; - -static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static const struct file_operations sc_only_mode_fops = { - .open = simple_open, - .read = sc_only_mode_read, - .llseek = default_llseek, -}; - -static int idle_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val != 0 && (val < 500 || val > 3600000)) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->idle_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int idle_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->idle_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, - idle_timeout_set, "%llu\n"); - -static int rpa_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - /* Require the RPA timeout to be at least 30 seconds and at most - * 24 hours. - */ - if (val < 30 || val > (60 * 60 * 24)) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->rpa_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int rpa_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->rpa_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, - rpa_timeout_set, "%llu\n"); - -static int sniff_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->sniff_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int sniff_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->sniff_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, - sniff_min_interval_set, "%llu\n"); - -static int sniff_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->sniff_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int sniff_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->sniff_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, - sniff_max_interval_set, "%llu\n"); - -static int conn_info_min_age_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val > hdev->conn_info_max_age) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->conn_info_min_age = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_info_min_age_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->conn_info_min_age; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, - conn_info_min_age_set, "%llu\n"); - -static int conn_info_max_age_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val == 0 || val < hdev->conn_info_min_age) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->conn_info_max_age = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_info_max_age_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->conn_info_max_age; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, - conn_info_max_age_set, "%llu\n"); - -static int identity_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - bdaddr_t addr; - u8 addr_type; - - hci_dev_lock(hdev); - - hci_copy_identity_address(hdev, &addr, &addr_type); - - seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type, - 16, hdev->irk, &hdev->rpa); - - hci_dev_unlock(hdev); - - return 0; -} - -static int identity_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_show, inode->i_private); -} - -static const struct file_operations identity_fops = { - .open = identity_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int random_address_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "%pMR\n", &hdev->random_addr); - hci_dev_unlock(hdev); - - return 0; -} - -static int random_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, random_address_show, inode->i_private); -} - -static const struct file_operations random_address_fops = { - .open = random_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int static_address_show(struct seq_file *f, void *p) -{ - struct hci_dev *hdev = f->private; - - hci_dev_lock(hdev); - seq_printf(f, "%pMR\n", &hdev->static_addr); - hci_dev_unlock(hdev); - - return 0; -} - -static int static_address_open(struct inode *inode, struct file *file) -{ - return single_open(file, static_address_show, inode->i_private); -} - -static const struct file_operations static_address_fops = { - .open = static_address_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static ssize_t force_static_address_read(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[3]; - - buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t force_static_address_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hci_dev *hdev = file->private_data; - char buf[32]; - size_t buf_size = min(count, (sizeof(buf)-1)); - bool enable; - - if (test_bit(HCI_UP, &hdev->flags)) - return -EBUSY; - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - buf[buf_size] = '\0'; - if (strtobool(buf, &enable)) - return -EINVAL; - - if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) - return -EALREADY; - - change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); - - return count; -} - -static const struct file_operations force_static_address_fops = { - .open = simple_open, - .read = force_static_address_read, - .write = force_static_address_write, - .llseek = default_llseek, -}; - -static int white_list_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->le_white_list, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - hci_dev_unlock(hdev); - - return 0; -} - -static int white_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, white_list_show, inode->i_private); -} - -static const struct file_operations white_list_fops = { - .open = white_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int identity_resolving_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct smp_irk *irk; - - rcu_read_lock(); - list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { - seq_printf(f, "%pMR (type %u) %*phN %pMR\n", - &irk->bdaddr, irk->addr_type, - 16, irk->val, &irk->rpa); - } - rcu_read_unlock(); - - return 0; -} - -static int identity_resolving_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, identity_resolving_keys_show, - inode->i_private); -} - -static const struct file_operations identity_resolving_keys_fops = { - .open = identity_resolving_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int long_term_keys_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct smp_ltk *ltk; - - rcu_read_lock(); - list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list) - seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n", - <k->bdaddr, ltk->bdaddr_type, ltk->authenticated, - ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv), - __le64_to_cpu(ltk->rand), 16, ltk->val); - rcu_read_unlock(); - - return 0; -} - -static int long_term_keys_open(struct inode *inode, struct file *file) -{ - return single_open(file, long_term_keys_show, inode->i_private); -} - -static const struct file_operations long_term_keys_fops = { - .open = long_term_keys_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int conn_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, - conn_min_interval_set, "%llu\n"); - -static int conn_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, - conn_max_interval_set, "%llu\n"); - -static int conn_latency_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val > 0x01f3) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_conn_latency = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int conn_latency_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_conn_latency; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, - conn_latency_set, "%llu\n"); - -static int supervision_timeout_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x000a || val > 0x0c80) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_supv_timeout = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int supervision_timeout_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_supv_timeout; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, - supervision_timeout_set, "%llu\n"); - -static int adv_channel_map_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x01 || val > 0x07) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_channel_map = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_channel_map_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_channel_map; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, - adv_channel_map_set, "%llu\n"); - -static int adv_min_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_min_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_min_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_min_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, - adv_min_interval_set, "%llu\n"); - -static int adv_max_interval_set(void *data, u64 val) -{ - struct hci_dev *hdev = data; - - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) - return -EINVAL; - - hci_dev_lock(hdev); - hdev->le_adv_max_interval = val; - hci_dev_unlock(hdev); - - return 0; -} - -static int adv_max_interval_get(void *data, u64 *val) -{ - struct hci_dev *hdev = data; - - hci_dev_lock(hdev); - *val = hdev->le_adv_max_interval; - hci_dev_unlock(hdev); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, - adv_max_interval_set, "%llu\n"); - -static int device_list_show(struct seq_file *f, void *ptr) -{ - struct hci_dev *hdev = f->private; - struct hci_conn_params *p; - struct bdaddr_list *b; - - hci_dev_lock(hdev); - list_for_each_entry(b, &hdev->whitelist, list) - seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); - list_for_each_entry(p, &hdev->le_conn_params, list) { - seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type, - p->auto_connect); - } - hci_dev_unlock(hdev); - - return 0; -} - -static int device_list_open(struct inode *inode, struct file *file) -{ - return single_open(file, device_list_show, inode->i_private); -} - -static const struct file_operations device_list_fops = { - .open = device_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* ---- HCI requests ---- */ -static void hci_req_sync_complete(struct hci_dev *hdev, u8 result) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode) { BT_DBG("%s result 0x%2.2x", hdev->name, result); @@ -1427,43 +497,6 @@ static void le_setup(struct hci_request *req) set_bit(HCI_LE_ENABLED, &hdev->dev_flags); } -static u8 hci_get_inquiry_mode(struct hci_dev *hdev) -{ - if (lmp_ext_inq_capable(hdev)) - return 0x02; - - if (lmp_inq_rssi_capable(hdev)) - return 0x01; - - if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && - hdev->lmp_subver == 0x0757) - return 0x01; - - if (hdev->manufacturer == 15) { - if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) - return 0x01; - if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) - return 0x01; - if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) - return 0x01; - } - - if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && - hdev->lmp_subver == 0x1805) - return 0x01; - - return 0x00; -} - -static void hci_setup_inquiry_mode(struct hci_request *req) -{ - u8 mode; - - mode = hci_get_inquiry_mode(req->hdev); - - hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); -} - static void hci_setup_event_mask(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -1553,10 +586,16 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (lmp_le_capable(hdev)) le_setup(req); - /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read - * local supported commands HCI command. + /* All Bluetooth 1.2 and later controllers should support the + * HCI command for reading the local supported commands. + * + * Unfortunately some controllers indicate Bluetooth 1.2 support, + * but do not have support for this command. If that is the case, + * the driver can quirk the behavior and skip reading the local + * supported commands. */ - if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) + if (hdev->hci_ver > BLUETOOTH_VER_1_1 && + !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks)) hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { @@ -1570,6 +609,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { u8 mode = 0x01; + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); } else { @@ -1582,8 +622,18 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) } } - if (lmp_inq_rssi_capable(hdev)) - hci_setup_inquiry_mode(req); + if (lmp_inq_rssi_capable(hdev) || + test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) { + u8 mode; + + /* If Extended Inquiry Result events are supported, then + * they are clearly preferred over Inquiry Result with RSSI + * events. + */ + mode = lmp_ext_inq_capable(hdev) ? 0x02 : 0x01; + + hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); + } if (lmp_inq_tx_pwr_capable(hdev)) hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); @@ -1682,27 +732,12 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - /* Some Broadcom based Bluetooth controllers do not support the - * Delete Stored Link Key command. They are clearly indicating its - * absence in the bit mask of supported commands. - * - * Check the supported commands and only if the the command is marked - * as supported send it. If not supported assume that the controller - * does not have actual support for stored link keys which makes this - * command redundant anyway. - * - * Some controllers indicate that they support handling deleting - * stored link keys, but they don't. The quirk lets a driver - * just disable this command. - */ - if (hdev->commands[6] & 0x80 && - !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { - struct hci_cp_delete_stored_link_key cp; + if (hdev->commands[6] & 0x20) { + struct hci_cp_read_stored_link_key cp; bacpy(&cp.bdaddr, BDADDR_ANY); - cp.delete_all = 0x01; - hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, - sizeof(cp), &cp); + cp.read_all = 0x01; + hci_req_add(req, HCI_OP_READ_STORED_LINK_KEY, sizeof(cp), &cp); } if (hdev->commands[5] & 0x10) @@ -1735,6 +770,12 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) * Parameter Request */ + /* If the controller supports the Data Length Extension + * feature, enable the corresponding event. + */ + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) + events[0] |= 0x40; /* LE Data Length Change */ + /* If the controller supports Extended Scanner Filter * Policies, enable the correspondig event. */ @@ -1765,6 +806,14 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); } + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { + /* Read LE Maximum Data Length */ + hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); + + /* Read LE Suggested Default Data Length */ + hci_req_add(req, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL); + } + hci_set_le_support(req); } @@ -1782,6 +831,29 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; + /* Some Broadcom based Bluetooth controllers do not support the + * Delete Stored Link Key command. They are clearly indicating its + * absence in the bit mask of supported commands. + * + * Check the supported commands and only if the the command is marked + * as supported send it. If not supported assume that the controller + * does not have actual support for stored link keys which makes this + * command redundant anyway. + * + * Some controllers indicate that they support handling deleting + * stored link keys, but they don't. The quirk lets a driver + * just disable this command. + */ + if (hdev->commands[6] & 0x80 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { + struct hci_cp_delete_stored_link_key cp; + + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.delete_all = 0x01; + hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, + sizeof(cp), &cp); + } + /* Set event mask page 2 if the HCI command for it is supported */ if (hdev->commands[22] & 0x04) hci_set_event_mask_page_2(req); @@ -1799,8 +871,10 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); /* Enable Secure Connections if supported and configured */ - if (bredr_sc_enabled(hdev)) { + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + bredr_sc_enabled(hdev)) { u8 support = 0x01; + hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, sizeof(support), &support); } @@ -1841,110 +915,29 @@ static int __hci_init(struct hci_dev *hdev) if (err < 0) return err; - /* Only create debugfs entries during the initial setup - * phase and not every time the controller gets powered on. + /* This function is only called when the controller is actually in + * configured state. When the controller is marked as unconfigured, + * this initialization procedure is not run. + * + * It means that it is possible that a controller runs through its + * setup phase and then discovers missing settings. If that is the + * case, then this function will not be called. It then will only + * be called during the config phase. + * + * So only when in setup phase or config phase, create the debugfs + * entries and register the SMP channels. */ - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_CONFIG, &hdev->dev_flags)) return 0; - debugfs_create_file("features", 0444, hdev->debugfs, hdev, - &features_fops); - debugfs_create_u16("manufacturer", 0444, hdev->debugfs, - &hdev->manufacturer); - debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver); - debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); - debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, - &device_list_fops); - debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, - &blacklist_fops); - debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); - - debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, - &conn_info_min_age_fops); - debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, - &conn_info_max_age_fops); - - if (lmp_bredr_capable(hdev)) { - debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, - hdev, &inquiry_cache_fops); - debugfs_create_file("link_keys", 0400, hdev->debugfs, - hdev, &link_keys_fops); - debugfs_create_file("dev_class", 0444, hdev->debugfs, - hdev, &dev_class_fops); - debugfs_create_file("voice_setting", 0444, hdev->debugfs, - hdev, &voice_setting_fops); - } + hci_debugfs_create_common(hdev); - if (lmp_ssp_capable(hdev)) { - debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, - hdev, &auto_accept_delay_fops); - debugfs_create_file("force_sc_support", 0644, hdev->debugfs, - hdev, &force_sc_support_fops); - debugfs_create_file("sc_only_mode", 0444, hdev->debugfs, - hdev, &sc_only_mode_fops); - if (lmp_le_capable(hdev)) - debugfs_create_file("force_lesc_support", 0644, - hdev->debugfs, hdev, - &force_lesc_support_fops); - } - - if (lmp_sniff_capable(hdev)) { - debugfs_create_file("idle_timeout", 0644, hdev->debugfs, - hdev, &idle_timeout_fops); - debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs, - hdev, &sniff_min_interval_fops); - debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs, - hdev, &sniff_max_interval_fops); - } + if (lmp_bredr_capable(hdev)) + hci_debugfs_create_bredr(hdev); - if (lmp_le_capable(hdev)) { - debugfs_create_file("identity", 0400, hdev->debugfs, - hdev, &identity_fops); - debugfs_create_file("rpa_timeout", 0644, hdev->debugfs, - hdev, &rpa_timeout_fops); - debugfs_create_file("random_address", 0444, hdev->debugfs, - hdev, &random_address_fops); - debugfs_create_file("static_address", 0444, hdev->debugfs, - hdev, &static_address_fops); - - /* For controllers with a public address, provide a debug - * option to force the usage of the configured static - * address. By default the public address is used. - */ - if (bacmp(&hdev->bdaddr, BDADDR_ANY)) - debugfs_create_file("force_static_address", 0644, - hdev->debugfs, hdev, - &force_static_address_fops); - - debugfs_create_u8("white_list_size", 0444, hdev->debugfs, - &hdev->le_white_list_size); - debugfs_create_file("white_list", 0444, hdev->debugfs, hdev, - &white_list_fops); - debugfs_create_file("identity_resolving_keys", 0400, - hdev->debugfs, hdev, - &identity_resolving_keys_fops); - debugfs_create_file("long_term_keys", 0400, hdev->debugfs, - hdev, &long_term_keys_fops); - debugfs_create_file("conn_min_interval", 0644, hdev->debugfs, - hdev, &conn_min_interval_fops); - debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, - hdev, &conn_max_interval_fops); - debugfs_create_file("conn_latency", 0644, hdev->debugfs, - hdev, &conn_latency_fops); - debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, - hdev, &supervision_timeout_fops); - debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, - hdev, &adv_channel_map_fops); - debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, - hdev, &adv_min_interval_fops); - debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, - hdev, &adv_max_interval_fops); - debugfs_create_u16("discov_interleaved_timeout", 0644, - hdev->debugfs, - &hdev->discov_interleaved_timeout); - - smp_register(hdev); - } + if (lmp_le_capable(hdev)) + hci_debugfs_create_le(hdev); return 0; } @@ -2624,6 +1617,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work(&hdev->service_cache); cancel_delayed_work_sync(&hdev->le_scan_disable); + cancel_delayed_work_sync(&hdev->le_scan_restart); if (test_bit(HCI_MGMT, &hdev->dev_flags)) cancel_delayed_work_sync(&hdev->rpa_expired); @@ -2635,6 +1629,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { if (hdev->dev_type == HCI_BREDR) mgmt_powered(hdev, 0); @@ -2645,6 +1641,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); + smp_unregister(hdev); + hci_notify(hdev, HCI_DEV_DOWN); if (hdev->flush) @@ -2724,32 +1722,14 @@ done: return err; } -int hci_dev_reset(__u16 dev) +static int hci_dev_do_reset(struct hci_dev *hdev) { - struct hci_dev *hdev; - int ret = 0; + int ret; - hdev = hci_dev_get(dev); - if (!hdev) - return -ENODEV; + BT_DBG("%s %p", hdev->name, hdev); hci_req_lock(hdev); - if (!test_bit(HCI_UP, &hdev->flags)) { - ret = -ENETDOWN; - goto done; - } - - if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { - ret = -EBUSY; - goto done; - } - - if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { - ret = -EOPNOTSUPP; - goto done; - } - /* Drop queues */ skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); @@ -2772,12 +1752,41 @@ int hci_dev_reset(__u16 dev) ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); -done: hci_req_unlock(hdev); - hci_dev_put(hdev); return ret; } +int hci_dev_reset(__u16 dev) +{ + struct hci_dev *hdev; + int err; + + hdev = hci_dev_get(dev); + if (!hdev) + return -ENODEV; + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = -ENETDOWN; + goto done; + } + + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EBUSY; + goto done; + } + + if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + + err = hci_dev_do_reset(hdev); + +done: + hci_dev_put(hdev); + return err; +} + int hci_dev_reset_stat(__u16 dev) { struct hci_dev *hdev; @@ -3143,6 +2152,24 @@ static void hci_power_off(struct work_struct *work) hci_dev_do_close(hdev); } +static void hci_error_reset(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + + BT_DBG("%s", hdev->name); + + if (hdev->hw_error) + hdev->hw_error(hdev, hdev->hw_error_code); + else + BT_ERR("%s hardware error 0x%2.2x", hdev->name, + hdev->hw_error_code); + + if (hci_dev_do_close(hdev)) + return; + + hci_dev_do_open(hdev); +} + static void hci_discov_off(struct work_struct *work) { struct hci_dev *hdev; @@ -3555,9 +2582,15 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, if (hash192 && rand192) { memcpy(data->hash192, hash192, sizeof(data->hash192)); memcpy(data->rand192, rand192, sizeof(data->rand192)); + if (hash256 && rand256) + data->present = 0x03; } else { memset(data->hash192, 0, sizeof(data->hash192)); memset(data->rand192, 0, sizeof(data->rand192)); + if (hash256 && rand256) + data->present = 0x02; + else + data->present = 0x00; } if (hash256 && rand256) { @@ -3566,6 +2599,8 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, } else { memset(data->hash256, 0, sizeof(data->hash256)); memset(data->rand256, 0, sizeof(data->rand256)); + if (hash192 && rand192) + data->present = 0x01; } BT_DBG("%s for %pMR", hdev->name, bdaddr); @@ -3659,23 +2694,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, return NULL; } -static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) -{ - struct hci_conn *conn; - - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); - if (!conn) - return false; - - if (conn->dst_type != type) - return false; - - if (conn->state != BT_CONNECTED) - return false; - - return true; -} - /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type) @@ -3731,47 +2749,6 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, return params; } -/* This function requires the caller holds hdev->lock */ -int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, - u8 auto_connect) -{ - struct hci_conn_params *params; - - params = hci_conn_params_add(hdev, addr, addr_type); - if (!params) - return -EIO; - - if (params->auto_connect == auto_connect) - return 0; - - list_del_init(¶ms->action); - - switch (auto_connect) { - case HCI_AUTO_CONN_DISABLED: - case HCI_AUTO_CONN_LINK_LOSS: - hci_update_background_scan(hdev); - break; - case HCI_AUTO_CONN_REPORT: - list_add(¶ms->action, &hdev->pend_le_reports); - hci_update_background_scan(hdev); - break; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - if (!is_connected(hdev, addr, addr_type)) { - list_add(¶ms->action, &hdev->pend_le_conns); - hci_update_background_scan(hdev); - } - break; - } - - params->auto_connect = auto_connect; - - BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, - auto_connect); - - return 0; -} - static void hci_conn_params_free(struct hci_conn_params *params) { if (params->conn) { @@ -3828,7 +2805,7 @@ void hci_conn_params_clear_all(struct hci_dev *hdev) BT_DBG("All LE connection parameters were removed"); } -static void inquiry_complete(struct hci_dev *hdev, u8 status) +static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) { if (status) { BT_ERR("Failed to start inquiry: status %d", status); @@ -3840,7 +2817,8 @@ static void inquiry_complete(struct hci_dev *hdev, u8 status) } } -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; @@ -3853,6 +2831,8 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) return; } + hdev->discovery.scan_start = 0; + switch (hdev->discovery.type) { case DISCOV_TYPE_LE: hci_dev_lock(hdev); @@ -3892,6 +2872,8 @@ static void le_scan_disable_work(struct work_struct *work) BT_DBG("%s", hdev->name); + cancel_delayed_work_sync(&hdev->le_scan_restart); + hci_req_init(&req, hdev); hci_req_add_le_scan_disable(&req); @@ -3901,110 +2883,72 @@ static void le_scan_disable_work(struct work_struct *work) BT_ERR("Disable LE scanning request failed: err %d", err); } -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { - struct hci_dev *hdev = req->hdev; + unsigned long timeout, duration, scan_start, now; - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || - hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { - BT_DBG("Deferring random address update"); - set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + BT_DBG("%s", hdev->name); + + if (status) { + BT_ERR("Failed to restart LE scan: status %d", status); return; } - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - -int hci_update_random_address(struct hci_request *req, bool require_privacy, - u8 *own_addr_type) -{ - struct hci_dev *hdev = req->hdev; - int err; + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || + !hdev->discovery.scan_start) + return; - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired or there is something else than - * the current RPA in use, then generate a new one. + /* When the scan was started, hdev->le_scan_disable has been queued + * after duration from scan_start. During scan restart this job + * has been canceled, and we need to queue it again after proper + * timeout, to make sure that scan does not run indefinitely. */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { - int to; - - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && - !bacmp(&hdev->random_addr, &hdev->rpa)) - return 0; - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); - return err; - } - - set_random_addr(req, &hdev->rpa); - - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); + duration = hdev->discovery.scan_duration; + scan_start = hdev->discovery.scan_start; + now = jiffies; + if (now - scan_start <= duration) { + int elapsed; + + if (now >= scan_start) + elapsed = now - scan_start; + else + elapsed = ULONG_MAX - scan_start + now; - return 0; + timeout = duration - elapsed; + } else { + timeout = 0; } + queue_delayed_work(hdev->workqueue, + &hdev->le_scan_disable, timeout); +} - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for active - * scanning and non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; +static void le_scan_restart_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); + struct hci_request req; + struct hci_cp_le_set_scan_enable cp; + int err; - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; + BT_DBG("%s", hdev->name); - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; - *own_addr_type = ADDR_LE_DEV_RANDOM; - set_random_addr(req, &nrpa); - return 0; - } + hci_req_init(&req, hdev); - /* If forcing static address is in use or there is no public - * address use the static address as random address (but skip - * the HCI command if the current random address is already the - * static one. - */ - if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY)) { - *own_addr_type = ADDR_LE_DEV_RANDOM; - if (bacmp(&hdev->static_addr, &hdev->random_addr)) - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, - &hdev->static_addr); - return 0; - } + hci_req_add_le_scan_disable(&req); - /* Neither privacy nor static address is being used so use a - * public address. - */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - return 0; + err = hci_req_run(&req, le_scan_restart_work_complete); + if (err) + BT_ERR("Restart LE scan request failed: err %d", err); } /* Copy the Identity Address of the controller. @@ -4015,12 +2959,18 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * * For debugging purposes it is possible to force controllers with a * public address to use the static random address instead. + * + * In case BR/EDR has been disabled on a dual-mode controller and + * userspace has configured a static address, then that address + * becomes the identity address instead of the public BR/EDR address. */ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type) { if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || - !bacmp(&hdev->bdaddr, BDADDR_ANY)) { + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { bacpy(bdaddr, &hdev->static_addr); *bdaddr_type = ADDR_LE_DEV_RANDOM; } else { @@ -4059,6 +3009,12 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_conn_max_interval = 0x0038; hdev->le_conn_latency = 0x0000; hdev->le_supv_timeout = 0x002a; + hdev->le_def_tx_len = 0x001b; + hdev->le_def_tx_time = 0x0148; + hdev->le_max_tx_len = 0x001b; + hdev->le_max_tx_time = 0x0148; + hdev->le_max_rx_len = 0x001b; + hdev->le_max_rx_time = 0x0148; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; @@ -4086,10 +3042,12 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); + INIT_WORK(&hdev->error_reset, hci_error_reset); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); + INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); @@ -4259,8 +3217,6 @@ void hci_unregister_dev(struct hci_dev *hdev) rfkill_destroy(hdev->rfkill); } - smp_unregister(hdev); - device_del(&hdev->dev); debugfs_remove_recursive(hdev->debugfs); @@ -4539,76 +3495,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) } } -void hci_req_init(struct hci_request *req, struct hci_dev *hdev) -{ - skb_queue_head_init(&req->cmd_q); - req->hdev = hdev; - req->err = 0; -} - -int hci_req_run(struct hci_request *req, hci_req_complete_t complete) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - unsigned long flags; - - BT_DBG("length %u", skb_queue_len(&req->cmd_q)); - - /* If an error occurred during request building, remove all HCI - * commands queued on the HCI request queue. - */ - if (req->err) { - skb_queue_purge(&req->cmd_q); - return req->err; - } - - /* Do not allow empty requests */ - if (skb_queue_empty(&req->cmd_q)) - return -ENODATA; - - skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->req.complete = complete; - - spin_lock_irqsave(&hdev->cmd_q.lock, flags); - skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); - spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); - - queue_work(hdev->workqueue, &hdev->cmd_work); - - return 0; -} - bool hci_req_pending(struct hci_dev *hdev) { return (hdev->req_status == HCI_REQ_PEND); } -static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, - u32 plen, const void *param) -{ - int len = HCI_COMMAND_HDR_SIZE + plen; - struct hci_command_hdr *hdr; - struct sk_buff *skb; - - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) - return NULL; - - hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); - hdr->opcode = cpu_to_le16(opcode); - hdr->plen = plen; - - if (plen) - memcpy(skb_put(skb, plen), param, plen); - - BT_DBG("skb len %d", skb->len); - - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - bt_cb(skb)->opcode = opcode; - - return skb; -} - /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param) @@ -4634,43 +3525,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, return 0; } -/* Queue a command to an asynchronous HCI request */ -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, - const void *param, u8 event) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - - BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); - - /* If an error occurred during request building, there is no point in - * queueing the HCI command. We can simply return. - */ - if (req->err) - return; - - skb = hci_prepare_cmd(hdev, opcode, plen, param); - if (!skb) { - BT_ERR("%s no memory for command (opcode 0x%4.4x)", - hdev->name, opcode); - req->err = -ENOMEM; - return; - } - - if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->req.start = true; - - bt_cb(skb)->req.event = event; - - skb_queue_tail(&req->cmd_q, skb); -} - -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, - const void *param) -{ - hci_req_add_ev(req, opcode, plen, param, 0); -} - /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { @@ -5429,7 +4283,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) call_complete: if (req_complete) - req_complete(hdev, status); + req_complete(hdev, status, status ? opcode : HCI_OP_NOP); } static void hci_rx_work(struct work_struct *work) @@ -5518,302 +4372,3 @@ static void hci_cmd_work(struct work_struct *work) } } } - -void hci_req_add_le_scan_disable(struct hci_request *req) -{ - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_DISABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); -} - -static void add_to_white_list(struct hci_request *req, - struct hci_conn_params *params) -{ - struct hci_cp_le_add_to_white_list cp; - - cp.bdaddr_type = params->addr_type; - bacpy(&cp.bdaddr, ¶ms->addr); - - hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); -} - -static u8 update_white_list(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_conn_params *params; - struct bdaddr_list *b; - uint8_t white_list_entries = 0; - - /* Go through the current white list programmed into the - * controller one by one and check if that address is still - * in the list of pending connections or list of devices to - * report. If not present in either list, then queue the - * command to remove it from the controller. - */ - list_for_each_entry(b, &hdev->le_white_list, list) { - struct hci_cp_le_del_from_white_list cp; - - if (hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) || - hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - white_list_entries++; - continue; - } - - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); - } - - /* Since all no longer valid white list entries have been - * removed, walk through the list of pending connections - * and ensure that any new device gets programmed into - * the controller. - * - * If the list of the devices is larger than the list of - * available white list entries in the controller, then - * just abort and return filer policy value to not use the - * white list. - */ - list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); - } - - /* After adding all new pending connections, walk through - * the list of pending reports and also add these to the - * white list if there is still space. - */ - list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (hci_bdaddr_list_lookup(&hdev->le_white_list, - ¶ms->addr, params->addr_type)) - continue; - - if (white_list_entries >= hdev->le_white_list_size) { - /* Select filter policy to accept all advertising */ - return 0x00; - } - - if (hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type)) { - /* White list can not be used with RPAs */ - return 0x00; - } - - white_list_entries++; - add_to_white_list(req, params); - } - - /* Select filter policy to use white list */ - return 0x01; -} - -void hci_req_add_le_passive_scan(struct hci_request *req) -{ - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - struct hci_dev *hdev = req->hdev; - u8 own_addr_type; - u8 filter_policy; - - /* Set require_privacy to false since no SCAN_REQ are send - * during passive scanning. Not using an non-resolvable address - * here is important so that peer devices using direct - * advertising with our address will be correctly reported - * by the controller. - */ - if (hci_update_random_address(req, false, &own_addr_type)) - return; - - /* Adding or removing entries from the white list must - * happen before enabling scanning. The controller does - * not allow white list modification while scanning. - */ - filter_policy = update_white_list(req); - - /* When the controller is using random resolvable addresses and - * with that having LE privacy enabled, then controllers with - * Extended Scanner Filter Policies support can now enable support - * for handling directed advertising. - * - * So instead of using filter polices 0x00 (no whitelist) - * and 0x01 (whitelist enabled) use the new filter policies - * 0x02 (no whitelist) and 0x03 (whitelist enabled). - */ - if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && - (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) - filter_policy |= 0x02; - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = LE_SCAN_PASSIVE; - param_cp.interval = cpu_to_le16(hdev->le_scan_interval); - param_cp.window = cpu_to_le16(hdev->le_scan_window); - param_cp.own_address_type = own_addr_type; - param_cp.filter_policy = filter_policy; - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); -} - -static void update_background_scan_complete(struct hci_dev *hdev, u8 status) -{ - if (status) - BT_DBG("HCI request failed to update background scanning: " - "status 0x%2.2x", status); -} - -/* This function controls the background scanning based on hdev->pend_le_conns - * list. If there are pending LE connection we start the background scanning, - * otherwise we stop it. - * - * This function requires the caller holds hdev->lock. - */ -void hci_update_background_scan(struct hci_dev *hdev) -{ - struct hci_request req; - struct hci_conn *conn; - int err; - - if (!test_bit(HCI_UP, &hdev->flags) || - test_bit(HCI_INIT, &hdev->flags) || - test_bit(HCI_SETUP, &hdev->dev_flags) || - test_bit(HCI_CONFIG, &hdev->dev_flags) || - test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || - test_bit(HCI_UNREGISTER, &hdev->dev_flags)) - return; - - /* No point in doing scanning if LE support hasn't been enabled */ - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - return; - - /* If discovery is active don't interfere with it */ - if (hdev->discovery.state != DISCOVERY_STOPPED) - return; - - /* Reset RSSI and UUID filters when starting background scanning - * since these filters are meant for service discovery only. - * - * The Start Discovery and Start Service Discovery operations - * ensure to set proper values for RSSI threshold and UUID - * filter list. So it is safe to just reset them here. - */ - hci_discovery_filter_clear(hdev); - - hci_req_init(&req, hdev); - - if (list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) { - /* If there is no pending LE connections or devices - * to be scanned for, we should stop the background - * scanning. - */ - - /* If controller is not scanning we are done. */ - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return; - - hci_req_add_le_scan_disable(&req); - - BT_DBG("%s stopping background scanning", hdev->name); - } else { - /* If there is at least one pending LE connection, we should - * keep the background scan running. - */ - - /* If controller is connecting, we should not start scanning - * since some controllers are not able to scan and connect at - * the same time. - */ - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (conn) - return; - - /* If controller is currently scanning, we stop it to ensure we - * don't miss any advertising (due to duplicates filter). - */ - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - hci_req_add_le_scan_disable(&req); - - hci_req_add_le_passive_scan(&req); - - BT_DBG("%s starting background scanning", hdev->name); - } - - err = hci_req_run(&req, update_background_scan_complete); - if (err) - BT_ERR("Failed to run HCI request: err %d", err); -} - -static bool disconnected_whitelist_entries(struct hci_dev *hdev) -{ - struct bdaddr_list *b; - - list_for_each_entry(b, &hdev->whitelist, list) { - struct hci_conn *conn; - - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); - if (!conn) - return true; - - if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) - return true; - } - - return false; -} - -void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req) -{ - u8 scan; - - if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) - return; - - if (!hdev_is_powered(hdev)) - return; - - if (mgmt_powering_down(hdev)) - return; - - if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || - disconnected_whitelist_entries(hdev)) - scan = SCAN_PAGE; - else - scan = SCAN_DISABLED; - - if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) - return; - - if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) - scan |= SCAN_INQUIRY; - - if (req) - hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); - else - hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); -} diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c new file mode 100644 index 000000000000..65261e5d4b84 --- /dev/null +++ b/net/bluetooth/hci_debugfs.c @@ -0,0 +1,1056 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <linux/debugfs.h> + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "hci_debugfs.h" + +static int features_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + u8 p; + + hci_dev_lock(hdev); + for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { + seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " + "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p, + hdev->features[p][0], hdev->features[p][1], + hdev->features[p][2], hdev->features[p][3], + hdev->features[p][4], hdev->features[p][5], + hdev->features[p][6], hdev->features[p][7]); + } + if (lmp_le_capable(hdev)) + seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x " + "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", + hdev->le_features[0], hdev->le_features[1], + hdev->le_features[2], hdev->le_features[3], + hdev->le_features[4], hdev->le_features[5], + hdev->le_features[6], hdev->le_features[7]); + hci_dev_unlock(hdev); + + return 0; +} + +static int features_open(struct inode *inode, struct file *file) +{ + return single_open(file, features_show, inode->i_private); +} + +static const struct file_operations features_fops = { + .open = features_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int device_list_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct hci_conn_params *p; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->whitelist, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + list_for_each_entry(p, &hdev->le_conn_params, list) { + seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type, + p->auto_connect); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int device_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, device_list_show, inode->i_private); +} + +static const struct file_operations device_list_fops = { + .open = device_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int blacklist_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->blacklist, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + hci_dev_unlock(hdev); + + return 0; +} + +static int blacklist_open(struct inode *inode, struct file *file) +{ + return single_open(file, blacklist_show, inode->i_private); +} + +static const struct file_operations blacklist_fops = { + .open = blacklist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uuids_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bt_uuid *uuid; + + hci_dev_lock(hdev); + list_for_each_entry(uuid, &hdev->uuids, list) { + u8 i, val[16]; + + /* The Bluetooth UUID values are stored in big endian, + * but with reversed byte order. So convert them into + * the right order for the %pUb modifier. + */ + for (i = 0; i < 16; i++) + val[i] = uuid->uuid[15 - i]; + + seq_printf(f, "%pUb\n", val); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int uuids_open(struct inode *inode, struct file *file) +{ + return single_open(file, uuids_show, inode->i_private); +} + +static const struct file_operations uuids_fops = { + .open = uuids_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int remote_oob_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct oob_data *data; + + hci_dev_lock(hdev); + list_for_each_entry(data, &hdev->remote_oob_data, list) { + seq_printf(f, "%pMR (type %u) %u %*phN %*phN %*phN %*phN\n", + &data->bdaddr, data->bdaddr_type, data->present, + 16, data->hash192, 16, data->rand192, + 16, data->hash256, 19, data->rand256); + } + hci_dev_unlock(hdev); + + return 0; +} + +static int remote_oob_open(struct inode *inode, struct file *file) +{ + return single_open(file, remote_oob_show, inode->i_private); +} + +static const struct file_operations remote_oob_fops = { + .open = remote_oob_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int conn_info_min_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val > hdev->conn_info_max_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_min_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_min_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_min_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, + conn_info_min_age_set, "%llu\n"); + +static int conn_info_max_age_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val < hdev->conn_info_min_age) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->conn_info_max_age = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_info_max_age_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->conn_info_max_age; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, + conn_info_max_age_set, "%llu\n"); + +static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations use_debug_keys_fops = { + .open = simple_open, + .read = use_debug_keys_read, + .llseek = default_llseek, +}; + +static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations sc_only_mode_fops = { + .open = simple_open, + .read = sc_only_mode_read, + .llseek = default_llseek, +}; + +void hci_debugfs_create_common(struct hci_dev *hdev) +{ + debugfs_create_file("features", 0444, hdev->debugfs, hdev, + &features_fops); + debugfs_create_u16("manufacturer", 0444, hdev->debugfs, + &hdev->manufacturer); + debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver); + debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev); + debugfs_create_u8("hardware_error", 0444, hdev->debugfs, + &hdev->hw_error_code); + + debugfs_create_file("device_list", 0444, hdev->debugfs, hdev, + &device_list_fops); + debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, + &blacklist_fops); + debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + debugfs_create_file("remote_oob", 0400, hdev->debugfs, hdev, + &remote_oob_fops); + + debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev, + &conn_info_min_age_fops); + debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev, + &conn_info_max_age_fops); + + if (lmp_ssp_capable(hdev) || lmp_le_capable(hdev)) + debugfs_create_file("use_debug_keys", 0444, hdev->debugfs, + hdev, &use_debug_keys_fops); + + if (lmp_sc_capable(hdev) || lmp_le_capable(hdev)) + debugfs_create_file("sc_only_mode", 0444, hdev->debugfs, + hdev, &sc_only_mode_fops); +} + +static int inquiry_cache_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct discovery_state *cache = &hdev->discovery; + struct inquiry_entry *e; + + hci_dev_lock(hdev); + + list_for_each_entry(e, &cache->all, all) { + struct inquiry_data *data = &e->data; + seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", + &data->bdaddr, + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); + } + + hci_dev_unlock(hdev); + + return 0; +} + +static int inquiry_cache_open(struct inode *inode, struct file *file) +{ + return single_open(file, inquiry_cache_show, inode->i_private); +} + +static const struct file_operations inquiry_cache_fops = { + .open = inquiry_cache_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int link_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct link_key *key; + + rcu_read_lock(); + list_for_each_entry_rcu(key, &hdev->link_keys, list) + seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type, + HCI_LINK_KEY_SIZE, key->val, key->pin_len); + rcu_read_unlock(); + + return 0; +} + +static int link_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, link_keys_show, inode->i_private); +} + +static const struct file_operations link_keys_fops = { + .open = link_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int dev_class_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2], + hdev->dev_class[1], hdev->dev_class[0]); + hci_dev_unlock(hdev); + + return 0; +} + +static int dev_class_open(struct inode *inode, struct file *file) +{ + return single_open(file, dev_class_show, inode->i_private); +} + +static const struct file_operations dev_class_fops = { + .open = dev_class_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int voice_setting_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->voice_setting; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get, + NULL, "0x%4.4llx\n"); + +static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = hdev->ssp_debug_mode ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static const struct file_operations ssp_debug_mode_fops = { + .open = simple_open, + .read = ssp_debug_mode_read, + .llseek = default_llseek, +}; + +static int auto_accept_delay_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + hdev->auto_accept_delay = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int auto_accept_delay_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->auto_accept_delay; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, + auto_accept_delay_set, "%llu\n"); + +static int idle_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val != 0 && (val < 500 || val > 3600000)) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->idle_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int idle_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->idle_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, + idle_timeout_set, "%llu\n"); + +static int sniff_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->sniff_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int sniff_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->sniff_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, + sniff_min_interval_set, "%llu\n"); + +static int sniff_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->sniff_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int sniff_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->sniff_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, + sniff_max_interval_set, "%llu\n"); + +void hci_debugfs_create_bredr(struct hci_dev *hdev) +{ + debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, + &inquiry_cache_fops); + debugfs_create_file("link_keys", 0400, hdev->debugfs, hdev, + &link_keys_fops); + debugfs_create_file("dev_class", 0444, hdev->debugfs, hdev, + &dev_class_fops); + debugfs_create_file("voice_setting", 0444, hdev->debugfs, hdev, + &voice_setting_fops); + + if (lmp_ssp_capable(hdev)) { + debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs, + hdev, &ssp_debug_mode_fops); + debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, + hdev, &auto_accept_delay_fops); + } + + if (lmp_sniff_capable(hdev)) { + debugfs_create_file("idle_timeout", 0644, hdev->debugfs, + hdev, &idle_timeout_fops); + debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs, + hdev, &sniff_min_interval_fops); + debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs, + hdev, &sniff_max_interval_fops); + } +} + +static int identity_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + bdaddr_t addr; + u8 addr_type; + + hci_dev_lock(hdev); + + hci_copy_identity_address(hdev, &addr, &addr_type); + + seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type, + 16, hdev->irk, &hdev->rpa); + + hci_dev_unlock(hdev); + + return 0; +} + +static int identity_open(struct inode *inode, struct file *file) +{ + return single_open(file, identity_show, inode->i_private); +} + +static const struct file_operations identity_fops = { + .open = identity_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int rpa_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + /* Require the RPA timeout to be at least 30 seconds and at most + * 24 hours. + */ + if (val < 30 || val > (60 * 60 * 24)) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->rpa_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int rpa_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->rpa_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, + rpa_timeout_set, "%llu\n"); + +static int random_address_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "%pMR\n", &hdev->random_addr); + hci_dev_unlock(hdev); + + return 0; +} + +static int random_address_open(struct inode *inode, struct file *file) +{ + return single_open(file, random_address_show, inode->i_private); +} + +static const struct file_operations random_address_fops = { + .open = random_address_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int static_address_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + + hci_dev_lock(hdev); + seq_printf(f, "%pMR\n", &hdev->static_addr); + hci_dev_unlock(hdev); + + return 0; +} + +static int static_address_open(struct inode *inode, struct file *file) +{ + return single_open(file, static_address_show, inode->i_private); +} + +static const struct file_operations static_address_fops = { + .open = static_address_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t force_static_address_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t force_static_address_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + bool enable; + + if (test_bit(HCI_UP, &hdev->flags)) + return -EBUSY; + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + if (strtobool(buf, &enable)) + return -EINVAL; + + if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags)) + return -EALREADY; + + change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags); + + return count; +} + +static const struct file_operations force_static_address_fops = { + .open = simple_open, + .read = force_static_address_read, + .write = force_static_address_write, + .llseek = default_llseek, +}; + +static int white_list_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *b; + + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->le_white_list, list) + seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type); + hci_dev_unlock(hdev); + + return 0; +} + +static int white_list_open(struct inode *inode, struct file *file) +{ + return single_open(file, white_list_show, inode->i_private); +} + +static const struct file_operations white_list_fops = { + .open = white_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int identity_resolving_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct smp_irk *irk; + + rcu_read_lock(); + list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { + seq_printf(f, "%pMR (type %u) %*phN %pMR\n", + &irk->bdaddr, irk->addr_type, + 16, irk->val, &irk->rpa); + } + rcu_read_unlock(); + + return 0; +} + +static int identity_resolving_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, identity_resolving_keys_show, + inode->i_private); +} + +static const struct file_operations identity_resolving_keys_fops = { + .open = identity_resolving_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int long_term_keys_show(struct seq_file *f, void *ptr) +{ + struct hci_dev *hdev = f->private; + struct smp_ltk *ltk; + + rcu_read_lock(); + list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list) + seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n", + <k->bdaddr, ltk->bdaddr_type, ltk->authenticated, + ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv), + __le64_to_cpu(ltk->rand), 16, ltk->val); + rcu_read_unlock(); + + return 0; +} + +static int long_term_keys_open(struct inode *inode, struct file *file) +{ + return single_open(file, long_term_keys_show, inode->i_private); +} + +static const struct file_operations long_term_keys_fops = { + .open = long_term_keys_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int conn_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, + conn_min_interval_set, "%llu\n"); + +static int conn_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, + conn_max_interval_set, "%llu\n"); + +static int conn_latency_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val > 0x01f3) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_conn_latency = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int conn_latency_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_conn_latency; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, + conn_latency_set, "%llu\n"); + +static int supervision_timeout_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x000a || val > 0x0c80) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_supv_timeout = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int supervision_timeout_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_supv_timeout; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, + supervision_timeout_set, "%llu\n"); + +static int adv_channel_map_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x01 || val > 0x07) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_channel_map = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_channel_map_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_channel_map; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, + adv_channel_map_set, "%llu\n"); + +static int adv_min_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_min_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_min_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_min_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, + adv_min_interval_set, "%llu\n"); + +static int adv_max_interval_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->le_adv_max_interval = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int adv_max_interval_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->le_adv_max_interval; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, + adv_max_interval_set, "%llu\n"); + +void hci_debugfs_create_le(struct hci_dev *hdev) +{ + debugfs_create_file("identity", 0400, hdev->debugfs, hdev, + &identity_fops); + debugfs_create_file("rpa_timeout", 0644, hdev->debugfs, hdev, + &rpa_timeout_fops); + debugfs_create_file("random_address", 0444, hdev->debugfs, hdev, + &random_address_fops); + debugfs_create_file("static_address", 0444, hdev->debugfs, hdev, + &static_address_fops); + + /* For controllers with a public address, provide a debug + * option to force the usage of the configured static + * address. By default the public address is used. + */ + if (bacmp(&hdev->bdaddr, BDADDR_ANY)) + debugfs_create_file("force_static_address", 0644, + hdev->debugfs, hdev, + &force_static_address_fops); + + debugfs_create_u8("white_list_size", 0444, hdev->debugfs, + &hdev->le_white_list_size); + debugfs_create_file("white_list", 0444, hdev->debugfs, hdev, + &white_list_fops); + debugfs_create_file("identity_resolving_keys", 0400, hdev->debugfs, + hdev, &identity_resolving_keys_fops); + debugfs_create_file("long_term_keys", 0400, hdev->debugfs, hdev, + &long_term_keys_fops); + debugfs_create_file("conn_min_interval", 0644, hdev->debugfs, hdev, + &conn_min_interval_fops); + debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, hdev, + &conn_max_interval_fops); + debugfs_create_file("conn_latency", 0644, hdev->debugfs, hdev, + &conn_latency_fops); + debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, hdev, + &supervision_timeout_fops); + debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, hdev, + &adv_channel_map_fops); + debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, hdev, + &adv_min_interval_fops); + debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, hdev, + &adv_max_interval_fops); + debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs, + &hdev->discov_interleaved_timeout); +} + +void hci_debugfs_create_conn(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + char name[6]; + + if (IS_ERR_OR_NULL(hdev->debugfs)) + return; + + snprintf(name, sizeof(name), "%u", conn->handle); + conn->debugfs = debugfs_create_dir(name, hdev->debugfs); +} diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h new file mode 100644 index 000000000000..fb68efe083c5 --- /dev/null +++ b/net/bluetooth/hci_debugfs.h @@ -0,0 +1,26 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +void hci_debugfs_create_common(struct hci_dev *hdev); +void hci_debugfs_create_bredr(struct hci_dev *hdev); +void hci_debugfs_create_le(struct hci_dev *hdev); +void hci_debugfs_create_conn(struct hci_conn *conn); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 39a5c8a01726..a3fb094822b6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -30,10 +30,15 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" +#include "hci_debugfs.h" #include "a2mp.h" #include "amp.h" #include "smp.h" +#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ + "\x00\x00\x00\x00\x00\x00\x00\x00" + /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) @@ -195,7 +200,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) /* Reset all non-persistent flags */ hdev->dev_flags &= ~HCI_PERSISTENT_MASK; - hdev->discovery.state = DISCOVERY_STOPPED; + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hdev->inq_tx_power = HCI_TX_POWER_INVALID; hdev->adv_tx_power = HCI_TX_POWER_INVALID; @@ -212,6 +218,40 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) hci_bdaddr_list_clear(&hdev->le_white_list); } +static void hci_cc_read_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_stored_link_key *rp = (void *)skb->data; + struct hci_cp_read_stored_link_key *sent; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_READ_STORED_LINK_KEY); + if (!sent) + return; + + if (!rp->status && sent->read_all == 0x01) { + hdev->stored_max_keys = rp->max_keys; + hdev->stored_num_keys = rp->num_keys; + } +} + +static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_delete_stored_link_key *rp = (void *)skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + if (rp->num_keys <= hdev->stored_num_keys) + hdev->stored_num_keys -= rp->num_keys; + else + hdev->stored_num_keys = 0; +} + static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -242,7 +282,8 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags)) + if (test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags)) memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH); } @@ -488,9 +529,7 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb) hdev->features[1][0] &= ~LMP_HOST_SC; } - if (test_bit(HCI_MGMT, &hdev->dev_flags)) - mgmt_sc_enable_complete(hdev, sent->support, status); - else if (!status) { + if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) { if (sent->support) set_bit(HCI_SC_ENABLED, &hdev->dev_flags); else @@ -509,7 +548,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags)) { hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); hdev->lmp_ver = rp->lmp_ver; @@ -528,7 +568,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, if (rp->status) return; - if (test_bit(HCI_SETUP, &hdev->dev_flags)) + if (test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags)) memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); } @@ -1279,6 +1320,55 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev, memcpy(hdev->le_states, rp->le_states, 8); } +static void hci_cc_le_read_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_def_data_len *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_def_tx_len = le16_to_cpu(rp->tx_len); + hdev->le_def_tx_time = le16_to_cpu(rp->tx_time); +} + +static void hci_cc_le_write_def_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_cp_le_write_def_data_len *sent; + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN); + if (!sent) + return; + + hdev->le_def_tx_len = le16_to_cpu(sent->tx_len); + hdev->le_def_tx_time = le16_to_cpu(sent->tx_time); +} + +static void hci_cc_le_read_max_data_len(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_max_data_len *rp = (void *) skb->data; + + BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_max_tx_len = le16_to_cpu(rp->tx_len); + hdev->le_max_tx_time = le16_to_cpu(rp->tx_time); + hdev->le_max_rx_len = le16_to_cpu(rp->rx_len); + hdev->le_max_rx_time = le16_to_cpu(rp->rx_time); +} + static void hci_cc_write_le_host_supported(struct hci_dev *hdev, struct sk_buff *skb) { @@ -1399,6 +1489,21 @@ unlock: hci_dev_unlock(hdev); } +static void hci_cc_write_ssp_debug_mode(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 status = *((u8 *) skb->data); + u8 *mode; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + mode = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE); + if (mode) + hdev->ssp_debug_mode = *mode; +} + static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -2112,6 +2217,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } else conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); if (test_bit(HCI_AUTH, &hdev->flags)) @@ -2127,7 +2233,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); } /* Set packet type for incoming connection */ @@ -2194,7 +2300,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags) && + /* Require HCI_CONNECTABLE or a whitelist entry to accept the + * connection. These features are only touched through mgmt so + * only do the checks if HCI_MGMT is set. + */ + if (test_bit(HCI_MGMT, &hdev->dev_flags) && + !test_bit(HCI_CONNECTABLE, &hdev->dev_flags) && !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr, BDADDR_BREDR)) { hci_reject_conn(hdev, &ev->bdaddr); @@ -2308,7 +2419,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); @@ -2575,7 +2686,8 @@ static void hci_remote_features_evt(struct hci_dev *hdev, if (conn->state != BT_CONFIG) goto unlock; - if (!ev->status && lmp_ssp_capable(hdev) && lmp_ssp_capable(conn)) { + if (!ev->status && lmp_ext_feat_capable(hdev) && + lmp_ext_feat_capable(conn)) { struct hci_cp_read_remote_ext_features cp; cp.handle = ev->handle; cp.page = 0x01; @@ -2654,6 +2766,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_reset(hdev, skb); break; + case HCI_OP_READ_STORED_LINK_KEY: + hci_cc_read_stored_link_key(hdev, skb); + break; + + case HCI_OP_DELETE_STORED_LINK_KEY: + hci_cc_delete_stored_link_key(hdev, skb); + break; + case HCI_OP_WRITE_LOCAL_NAME: hci_cc_write_local_name(hdev, skb); break; @@ -2846,6 +2966,18 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_le_read_supported_states(hdev, skb); break; + case HCI_OP_LE_READ_DEF_DATA_LEN: + hci_cc_le_read_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_WRITE_DEF_DATA_LEN: + hci_cc_le_write_def_data_len(hdev, skb); + break; + + case HCI_OP_LE_READ_MAX_DATA_LEN: + hci_cc_le_read_max_data_len(hdev, skb); + break; + case HCI_OP_WRITE_LE_HOST_SUPPORTED: hci_cc_write_le_host_supported(hdev, skb); break; @@ -2866,6 +2998,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_read_tx_power(hdev, skb); break; + case HCI_OP_WRITE_SSP_DEBUG_MODE: + hci_cc_write_ssp_debug_mode(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); break; @@ -2984,7 +3120,9 @@ static void hci_hardware_error_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_hardware_error *ev = (void *) skb->data; - BT_ERR("%s hardware error 0x%2.2x", hdev->name, ev->code); + hdev->hw_error_code = ev->code; + + queue_work(hdev->req_workqueue, &hdev->error_reset); } static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -3576,6 +3714,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); break; @@ -3742,6 +3881,52 @@ static u8 hci_get_auth_req(struct hci_conn *conn) return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01); } +static u8 bredr_oob_data_present(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct oob_data *data; + + data = hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR); + if (!data) + return 0x00; + + if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)) { + if (bredr_sc_enabled(hdev)) { + /* When Secure Connections is enabled, then just + * return the present value stored with the OOB + * data. The stored value contains the right present + * information. However it can only be trusted when + * not in Secure Connection Only mode. + */ + if (!test_bit(HCI_SC_ONLY, &hdev->dev_flags)) + return data->present; + + /* When Secure Connections Only mode is enabled, then + * the P-256 values are required. If they are not + * available, then do not declare that OOB data is + * present. + */ + if (!memcmp(data->rand256, ZERO_KEY, 16) || + !memcmp(data->hash256, ZERO_KEY, 16)) + return 0x00; + + return 0x02; + } + + /* When Secure Connections is not enabled or actually + * not supported by the hardware, then check that if + * P-192 data values are present. + */ + if (!memcmp(data->rand192, ZERO_KEY, 16) || + !memcmp(data->hash192, ZERO_KEY, 16)) + return 0x00; + + return 0x01; + } + + return 0x00; +} + static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_io_capa_request *ev = (void *) skb->data; @@ -3793,12 +3978,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->auth_type &= HCI_AT_NO_BONDING_MITM; cp.authentication = conn->auth_type; - - if (hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR) && - (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags))) - cp.oob_data = 0x01; - else - cp.oob_data = 0x00; + cp.oob_data = bredr_oob_data_present(conn); hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY, sizeof(cp), &cp); @@ -4050,33 +4230,39 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev, goto unlock; data = hci_find_remote_oob_data(hdev, &ev->bdaddr, BDADDR_BREDR); - if (data) { - if (bredr_sc_enabled(hdev)) { - struct hci_cp_remote_oob_ext_data_reply cp; - - bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.hash192, data->hash192, sizeof(cp.hash192)); - memcpy(cp.rand192, data->rand192, sizeof(cp.rand192)); - memcpy(cp.hash256, data->hash256, sizeof(cp.hash256)); - memcpy(cp.rand256, data->rand256, sizeof(cp.rand256)); + if (!data) { + struct hci_cp_remote_oob_data_neg_reply cp; - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY, - sizeof(cp), &cp); - } else { - struct hci_cp_remote_oob_data_reply cp; + bacpy(&cp.bdaddr, &ev->bdaddr); + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, + sizeof(cp), &cp); + goto unlock; + } - bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.hash, data->hash192, sizeof(cp.hash)); - memcpy(cp.rand, data->rand192, sizeof(cp.rand)); + if (bredr_sc_enabled(hdev)) { + struct hci_cp_remote_oob_ext_data_reply cp; - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, - sizeof(cp), &cp); + bacpy(&cp.bdaddr, &ev->bdaddr); + if (test_bit(HCI_SC_ONLY, &hdev->dev_flags)) { + memset(cp.hash192, 0, sizeof(cp.hash192)); + memset(cp.rand192, 0, sizeof(cp.rand192)); + } else { + memcpy(cp.hash192, data->hash192, sizeof(cp.hash192)); + memcpy(cp.rand192, data->rand192, sizeof(cp.rand192)); } + memcpy(cp.hash256, data->hash256, sizeof(cp.hash256)); + memcpy(cp.rand256, data->rand256, sizeof(cp.rand256)); + + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY, + sizeof(cp), &cp); } else { - struct hci_cp_remote_oob_data_neg_reply cp; + struct hci_cp_remote_oob_data_reply cp; bacpy(&cp.bdaddr, &ev->bdaddr); - hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY, + memcpy(cp.hash, data->hash192, sizeof(cp.hash)); + memcpy(cp.rand, data->rand192, sizeof(cp.rand)); + + hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY, sizeof(cp), &cp); } @@ -4116,6 +4302,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev, hcon->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(hcon); + hci_debugfs_create_conn(hcon); hci_conn_add_sysfs(hcon); amp_physical_cfm(bredr_hcon, hcon); @@ -4322,6 +4509,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->le_conn_latency = le16_to_cpu(ev->latency); conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout); + hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c new file mode 100644 index 000000000000..b59f92c6df0c --- /dev/null +++ b/net/bluetooth/hci_request.c @@ -0,0 +1,556 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "smp.h" +#include "hci_request.h" + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; + req->err = 0; +} + +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + unsigned long flags; + + BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + + /* If an error occurred during request building, remove all HCI + * commands queued on the HCI request queue. + */ + if (req->err) { + skb_queue_purge(&req->cmd_q); + return req->err; + } + + /* Do not allow empty requests */ + if (skb_queue_empty(&req->cmd_q)) + return -ENODATA; + + skb = skb_peek_tail(&req->cmd_q); + bt_cb(skb)->req.complete = complete; + + spin_lock_irqsave(&hdev->cmd_q.lock, flags); + skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + + queue_work(hdev->workqueue, &hdev->cmd_work); + + return 0; +} + +struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param) +{ + int len = HCI_COMMAND_HDR_SIZE + plen; + struct hci_command_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(len, GFP_ATOMIC); + if (!skb) + return NULL; + + hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); + hdr->opcode = cpu_to_le16(opcode); + hdr->plen = plen; + + if (plen) + memcpy(skb_put(skb, plen), param, plen); + + BT_DBG("skb len %d", skb->len); + + bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; + bt_cb(skb)->opcode = opcode; + + return skb; +} + +/* Queue a command to an asynchronous HCI request */ +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event) +{ + struct hci_dev *hdev = req->hdev; + struct sk_buff *skb; + + BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + + /* If an error occurred during request building, there is no point in + * queueing the HCI command. We can simply return. + */ + if (req->err) + return; + + skb = hci_prepare_cmd(hdev, opcode, plen, param); + if (!skb) { + BT_ERR("%s no memory for command (opcode 0x%4.4x)", + hdev->name, opcode); + req->err = -ENOMEM; + return; + } + + if (skb_queue_empty(&req->cmd_q)) + bt_cb(skb)->req.start = true; + + bt_cb(skb)->req.event = event; + + skb_queue_tail(&req->cmd_q, skb); +} + +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param) +{ + hci_req_add_ev(req, opcode, plen, param, 0); +} + +void hci_req_add_le_scan_disable(struct hci_request *req) +{ + struct hci_cp_le_set_scan_enable cp; + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_DISABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); +} + +static void add_to_white_list(struct hci_request *req, + struct hci_conn_params *params) +{ + struct hci_cp_le_add_to_white_list cp; + + cp.bdaddr_type = params->addr_type; + bacpy(&cp.bdaddr, ¶ms->addr); + + hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); +} + +static u8 update_white_list(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + struct bdaddr_list *b; + uint8_t white_list_entries = 0; + + /* Go through the current white list programmed into the + * controller one by one and check if that address is still + * in the list of pending connections or list of devices to + * report. If not present in either list, then queue the + * command to remove it from the controller. + */ + list_for_each_entry(b, &hdev->le_white_list, list) { + struct hci_cp_le_del_from_white_list cp; + + if (hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, b->bdaddr_type) || + hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, b->bdaddr_type)) { + white_list_entries++; + continue; + } + + cp.bdaddr_type = b->bdaddr_type; + bacpy(&cp.bdaddr, &b->bdaddr); + + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, + sizeof(cp), &cp); + } + + /* Since all no longer valid white list entries have been + * removed, walk through the list of pending connections + * and ensure that any new device gets programmed into + * the controller. + * + * If the list of the devices is larger than the list of + * available white list entries in the controller, then + * just abort and return filer policy value to not use the + * white list. + */ + list_for_each_entry(params, &hdev->pend_le_conns, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* After adding all new pending connections, walk through + * the list of pending reports and also add these to the + * white list if there is still space. + */ + list_for_each_entry(params, &hdev->pend_le_reports, action) { + if (hci_bdaddr_list_lookup(&hdev->le_white_list, + ¶ms->addr, params->addr_type)) + continue; + + if (white_list_entries >= hdev->le_white_list_size) { + /* Select filter policy to accept all advertising */ + return 0x00; + } + + if (hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } + + white_list_entries++; + add_to_white_list(req, params); + } + + /* Select filter policy to use white list */ + return 0x01; +} + +void hci_req_add_le_passive_scan(struct hci_request *req) +{ + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + struct hci_dev *hdev = req->hdev; + u8 own_addr_type; + u8 filter_policy; + + /* Set require_privacy to false since no SCAN_REQ are send + * during passive scanning. Not using an non-resolvable address + * here is important so that peer devices using direct + * advertising with our address will be correctly reported + * by the controller. + */ + if (hci_update_random_address(req, false, &own_addr_type)) + return; + + /* Adding or removing entries from the white list must + * happen before enabling scanning. The controller does + * not allow white list modification while scanning. + */ + filter_policy = update_white_list(req); + + /* When the controller is using random resolvable addresses and + * with that having LE privacy enabled, then controllers with + * Extended Scanner Filter Policies support can now enable support + * for handling directed advertising. + * + * So instead of using filter polices 0x00 (no whitelist) + * and 0x01 (whitelist enabled) use the new filter policies + * 0x02 (no whitelist) and 0x03 (whitelist enabled). + */ + if (test_bit(HCI_PRIVACY, &hdev->dev_flags) && + (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) + filter_policy |= 0x02; + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_PASSIVE; + param_cp.interval = cpu_to_le16(hdev->le_scan_interval); + param_cp.window = cpu_to_le16(hdev->le_scan_window); + param_cp.own_address_type = own_addr_type; + param_cp.filter_policy = filter_policy; + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); +} + +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +{ + struct hci_dev *hdev = req->hdev; + + /* If we're advertising or initiating an LE connection we can't + * go ahead and change the random address at this time. This is + * because the eventual initiator address used for the + * subsequently created connection will be undefined (some + * controllers use the new address and others the one we had + * when the operation started). + * + * In this kind of scenario skip the update and let the random + * address be updated at the next cycle. + */ + if (test_bit(HCI_LE_ADV, &hdev->dev_flags) || + hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) { + BT_DBG("Deferring random address update"); + set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags); + return; + } + + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); +} + +int hci_update_random_address(struct hci_request *req, bool require_privacy, + u8 *own_addr_type) +{ + struct hci_dev *hdev = req->hdev; + int err; + + /* If privacy is enabled use a resolvable private address. If + * current RPA has expired or there is something else than + * the current RPA in use, then generate a new one. + */ + if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) { + int to; + + *own_addr_type = ADDR_LE_DEV_RANDOM; + + if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) && + !bacmp(&hdev->random_addr, &hdev->rpa)) + return 0; + + err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); + if (err < 0) { + BT_ERR("%s failed to generate new RPA", hdev->name); + return err; + } + + set_random_addr(req, &hdev->rpa); + + to = msecs_to_jiffies(hdev->rpa_timeout * 1000); + queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); + + return 0; + } + + /* In case of required privacy without resolvable private address, + * use an non-resolvable private address. This is useful for active + * scanning and non-connectable advertising. + */ + if (require_privacy) { + bdaddr_t nrpa; + + while (true) { + /* The non-resolvable private address is generated + * from random six bytes with the two most significant + * bits cleared. + */ + get_random_bytes(&nrpa, 6); + nrpa.b[5] &= 0x3f; + + /* The non-resolvable private address shall not be + * equal to the public address. + */ + if (bacmp(&hdev->bdaddr, &nrpa)) + break; + } + + *own_addr_type = ADDR_LE_DEV_RANDOM; + set_random_addr(req, &nrpa); + return 0; + } + + /* If forcing static address is in use or there is no public + * address use the static address as random address (but skip + * the HCI command if the current random address is already the + * static one. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configured, then use that + * address instead of the public BR/EDR address. + */ + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + *own_addr_type = ADDR_LE_DEV_RANDOM; + if (bacmp(&hdev->static_addr, &hdev->random_addr)) + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, + &hdev->static_addr); + return 0; + } + + /* Neither privacy nor static address is being used so use a + * public address. + */ + *own_addr_type = ADDR_LE_DEV_PUBLIC; + + return 0; +} + +static bool disconnected_whitelist_entries(struct hci_dev *hdev) +{ + struct bdaddr_list *b; + + list_for_each_entry(b, &hdev->whitelist, list) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); + if (!conn) + return true; + + if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) + return true; + } + + return false; +} + +void __hci_update_page_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 scan; + + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + return; + + if (!hdev_is_powered(hdev)) + return; + + if (mgmt_powering_down(hdev)) + return; + + if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) || + disconnected_whitelist_entries(hdev)) + scan = SCAN_PAGE; + else + scan = SCAN_DISABLED; + + if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) + return; + + if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) + scan |= SCAN_INQUIRY; + + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +} + +void hci_update_page_scan(struct hci_dev *hdev) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_update_page_scan(&req); + hci_req_run(&req, NULL); +} + +/* This function controls the background scanning based on hdev->pend_le_conns + * list. If there are pending LE connection we start the background scanning, + * otherwise we stop it. + * + * This function requires the caller holds hdev->lock. + */ +void __hci_update_background_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn *conn; + + if (!test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + test_bit(HCI_SETUP, &hdev->dev_flags) || + test_bit(HCI_CONFIG, &hdev->dev_flags) || + test_bit(HCI_AUTO_OFF, &hdev->dev_flags) || + test_bit(HCI_UNREGISTER, &hdev->dev_flags)) + return; + + /* No point in doing scanning if LE support hasn't been enabled */ + if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + return; + + /* If discovery is active don't interfere with it */ + if (hdev->discovery.state != DISCOVERY_STOPPED) + return; + + /* Reset RSSI and UUID filters when starting background scanning + * since these filters are meant for service discovery only. + * + * The Start Discovery and Start Service Discovery operations + * ensure to set proper values for RSSI threshold and UUID + * filter list. So it is safe to just reset them here. + */ + hci_discovery_filter_clear(hdev); + + if (list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports)) { + /* If there is no pending LE connections or devices + * to be scanned for, we should stop the background + * scanning. + */ + + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; + + hci_req_add_le_scan_disable(req); + + BT_DBG("%s stopping background scanning", hdev->name); + } else { + /* If there is at least one pending LE connection, we should + * keep the background scan running. + */ + + /* If controller is connecting, we should not start scanning + * since some controllers are not able to scan and connect at + * the same time. + */ + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + return; + + /* If controller is currently scanning, we stop it to ensure we + * don't miss any advertising (due to duplicates filter). + */ + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + hci_req_add_le_scan_disable(req); + + hci_req_add_le_passive_scan(req); + + BT_DBG("%s starting background scanning", hdev->name); + } +} + +static void update_background_scan_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + if (status) + BT_DBG("HCI request failed to update background scanning: " + "status 0x%2.2x", status); +} + +void hci_update_background_scan(struct hci_dev *hdev) +{ + int err; + struct hci_request req; + + hci_req_init(&req, hdev); + + __hci_update_background_scan(&req); + + err = hci_req_run(&req, update_background_scan_complete); + if (err && err != -ENODATA) + BT_ERR("Failed to run HCI request: err %d", err); +} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h new file mode 100644 index 000000000000..adf074d33544 --- /dev/null +++ b/net/bluetooth/hci_request.h @@ -0,0 +1,54 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; +}; + +void hci_req_init(struct hci_request *req, struct hci_dev *hdev); +int hci_req_run(struct hci_request *req, hci_req_complete_t complete); +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, + const void *param); +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, + const void *param, u8 event); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status); + +struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param); + +void hci_req_add_le_scan_disable(struct hci_request *req); +void hci_req_add_le_passive_scan(struct hci_request *req); + +void hci_update_page_scan(struct hci_dev *hdev); +void __hci_update_page_scan(struct hci_request *req); + +int hci_update_random_address(struct hci_request *req, bool require_privacy, + u8 *own_addr_type); + +void hci_update_background_scan(struct hci_dev *hdev); +void __hci_update_background_scan(struct hci_request *req); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2c245fdf319a..1d65c5be7c82 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -216,11 +216,39 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) read_unlock(&hci_sk_list.lock); } +static void queue_monitor_skb(struct sk_buff *skb) +{ + struct sock *sk; + + BT_DBG("len %d", skb->len); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *nskb; + + if (sk->sk_state != BT_BOUND) + continue; + + if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) + continue; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Send frame to monitor socket */ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { - struct sock *sk; struct sk_buff *skb_copy = NULL; + struct hci_mon_hdr *hdr; __le16 opcode; if (!atomic_read(&monitor_promisc)) @@ -251,74 +279,21 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) return; } - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) - continue; - - if (!skb_copy) { - struct hci_mon_hdr *hdr; - - /* Create a private copy with headroom */ - skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, - GFP_ATOMIC, true); - if (!skb_copy) - continue; - - /* Put header before the data */ - hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); - hdr->opcode = opcode; - hdr->index = cpu_to_le16(hdev->id); - hdr->len = cpu_to_le16(skb->len); - } - - nskb = skb_clone(skb_copy, GFP_ATOMIC); - if (!nskb) - continue; - - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } + /* Create a private copy with headroom */ + skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, GFP_ATOMIC, true); + if (!skb_copy) + return; - read_unlock(&hci_sk_list.lock); + /* Put header before the data */ + hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); + hdr->opcode = opcode; + hdr->index = cpu_to_le16(hdev->id); + hdr->len = cpu_to_le16(skb->len); + queue_monitor_skb(skb_copy); kfree_skb(skb_copy); } -static void send_monitor_event(struct sk_buff *skb) -{ - struct sock *sk; - - BT_DBG("len %d", skb->len); - - read_lock(&hci_sk_list.lock); - - sk_for_each(sk, &hci_sk_list.head) { - struct sk_buff *nskb; - - if (sk->sk_state != BT_BOUND) - continue; - - if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR) - continue; - - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - continue; - - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } - - read_unlock(&hci_sk_list.lock); -} - static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -422,7 +397,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) skb = create_monitor_event(hdev, event); if (skb) { - send_monitor_event(skb); + queue_monitor_skb(skb); kfree_skb(skb); } } @@ -1230,6 +1205,8 @@ int __init hci_sock_init(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_hci) > sizeof(struct sockaddr)); + err = proto_register(&hci_sk_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index cc25d0b74b36..07348e142f16 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1314,13 +1314,14 @@ int hidp_connection_add(struct hidp_connadd_req *req, { struct hidp_session *session; struct l2cap_conn *conn; - struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan; + struct l2cap_chan *chan; int ret; ret = hidp_verify_sockets(ctrl_sock, intr_sock); if (ret) return ret; + chan = l2cap_pi(ctrl_sock->sk)->chan; conn = NULL; l2cap_chan_lock(chan); if (chan->conn) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d04dc0095736..6ba33f9631e8 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -63,10 +63,10 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event); -static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type) +static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type) { - if (hcon->type == LE_LINK) { - if (type == ADDR_LE_DEV_PUBLIC) + if (link_type == LE_LINK) { + if (bdaddr_type == ADDR_LE_DEV_PUBLIC) return BDADDR_LE_PUBLIC; else return BDADDR_LE_RANDOM; @@ -75,6 +75,16 @@ static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type) return BDADDR_BREDR; } +static inline u8 bdaddr_src_type(struct hci_conn *hcon) +{ + return bdaddr_type(hcon->type, hcon->src_type); +} + +static inline u8 bdaddr_dst_type(struct hci_conn *hcon) +{ + return bdaddr_type(hcon->type, hcon->dst_type); +} + /* ---- L2CAP channels ---- */ static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, @@ -646,7 +656,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); bacpy(&chan->dst, &hcon->dst); - chan->dst_type = bdaddr_type(hcon, hcon->dst_type); + chan->dst_type = bdaddr_dst_type(hcon); l2cap_chan_unlock(chan); } @@ -3790,8 +3800,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); - chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); - chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->local_amp_id = amp_id; @@ -5441,8 +5451,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, bacpy(&chan->src, &conn->hcon->src); bacpy(&chan->dst, &conn->hcon->dst); - chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type); - chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type); + chan->src_type = bdaddr_src_type(conn->hcon); + chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; chan->omtu = mtu; @@ -6881,7 +6891,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) */ if (hcon->type == LE_LINK && hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst, - bdaddr_type(hcon, hcon->dst_type))) { + bdaddr_dst_type(hcon))) { kfree_skb(skb); return; } @@ -6968,7 +6978,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) && (bredr_sc_enabled(hcon->hdev) || - test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags))) + test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags))) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); @@ -7123,7 +7133,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, /* Update source addr of the socket */ bacpy(&chan->src, &hcon->src); - chan->src_type = bdaddr_type(hcon, hcon->src_type); + chan->src_type = bdaddr_src_type(hcon); __l2cap_chan_add(conn, chan); @@ -7197,8 +7207,10 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) * global list (by passing NULL as first parameter). */ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, - bdaddr_t *src, u8 link_type) + struct hci_conn *hcon) { + u8 src_type = bdaddr_src_type(hcon); + read_lock(&chan_list_lock); if (c) @@ -7211,11 +7223,9 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, continue; if (c->state != BT_LISTEN) continue; - if (bacmp(&c->src, src) && bacmp(&c->src, BDADDR_ANY)) + if (bacmp(&c->src, &hcon->src) && bacmp(&c->src, BDADDR_ANY)) continue; - if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR) - continue; - if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) + if (src_type != c->src_type) continue; l2cap_chan_hold(c); @@ -7246,7 +7256,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) if (!conn) return; - dst_type = bdaddr_type(hcon, hcon->dst_type); + dst_type = bdaddr_dst_type(hcon); /* If device is blocked, do not create channels for it */ if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type)) @@ -7257,7 +7267,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) * we left off, because the list lock would prevent calling the * potentially sleeping l2cap_chan_lock() function. */ - pchan = l2cap_global_fixed_chan(NULL, &hdev->bdaddr, hcon->type); + pchan = l2cap_global_fixed_chan(NULL, hcon); while (pchan) { struct l2cap_chan *chan, *next; @@ -7270,7 +7280,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) if (chan) { bacpy(&chan->src, &hcon->src); bacpy(&chan->dst, &hcon->dst); - chan->src_type = bdaddr_type(hcon, hcon->src_type); + chan->src_type = bdaddr_src_type(hcon); chan->dst_type = dst_type; __l2cap_chan_add(conn, chan); @@ -7278,8 +7288,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) l2cap_chan_unlock(pchan); next: - next = l2cap_global_fixed_chan(pchan, &hdev->bdaddr, - hcon->type); + next = l2cap_global_fixed_chan(pchan, hcon); l2cap_chan_put(pchan); pchan = next; } @@ -7527,8 +7536,8 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) read_lock(&chan_list_lock); list_for_each_entry(c, &chan_list, global_l) { - seq_printf(f, "%pMR %pMR %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", - &c->src, &c->dst, + seq_printf(f, "%pMR (%u) %pMR (%u) %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", + &c->src, c->src_type, &c->dst, c->dst_type, c->state, __le16_to_cpu(c->psm), c->scid, c->dcid, c->imtu, c->omtu, c->sec_level, c->mode); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f65caf41953f..60694f0f4c73 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -302,7 +302,7 @@ done: static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; @@ -316,8 +316,6 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -338,10 +336,11 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock_nested(sk, L2CAP_NESTING_PARENT); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1614,6 +1613,8 @@ int __init l2cap_init_sockets(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_l2) > sizeof(struct sockaddr)); + err = proto_register(&l2cap_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 693ce8bcd06e..9ec5390c85eb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -32,6 +32,7 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> +#include "hci_request.h" #include "smp.h" #define MGMT_VERSION 1 @@ -130,6 +131,9 @@ static const u16 mgmt_events[] = { #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) +#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ + "\x00\x00\x00\x00\x00\x00\x00\x00" + struct pending_cmd { struct list_head list; u16 opcode; @@ -138,7 +142,7 @@ struct pending_cmd { size_t param_len; struct sock *sk; void *user_data; - void (*cmd_complete)(struct pending_cmd *cmd, u8 status); + int (*cmd_complete)(struct pending_cmd *cmd, u8 status); }; /* HCI to MGMT error code conversion table */ @@ -569,8 +573,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_HS; } - if (lmp_sc_capable(hdev) || - test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) + if (lmp_sc_capable(hdev)) settings |= MGMT_SETTING_SECURE_CONN; } @@ -1251,7 +1254,7 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) sizeof(settings)); } -static void clean_up_hci_complete(struct hci_dev *hdev, u8 status) +static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("%s status 0x%02x", hdev->name, status); @@ -1486,16 +1489,16 @@ static void cmd_complete_rsp(struct pending_cmd *cmd, void *data) cmd_status_rsp(cmd, data); } -static void generic_cmd_complete(struct pending_cmd *cmd, u8 status) +static int generic_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - cmd->param_len); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, cmd->param_len); } -static void addr_cmd_complete(struct pending_cmd *cmd, u8 status) +static int addr_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, - sizeof(struct mgmt_addr_info)); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, + sizeof(struct mgmt_addr_info)); } static u8 mgmt_bredr_support(struct hci_dev *hdev) @@ -1518,7 +1521,8 @@ static u8 mgmt_le_support(struct hci_dev *hdev) return MGMT_STATUS_SUCCESS; } -static void set_discoverable_complete(struct hci_dev *hdev, u8 status) +static void set_discoverable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; struct mgmt_mode *cp; @@ -1566,7 +1570,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status) * entries. */ hci_req_init(&req, hdev); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); update_class(&req); hci_req_run(&req, NULL); @@ -1777,7 +1781,8 @@ static void write_fast_connectable(struct hci_request *req, bool enable) hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); } -static void set_connectable_complete(struct hci_dev *hdev, u8 status) +static void set_connectable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; struct mgmt_mode *cp; @@ -1813,7 +1818,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status) if (conn_changed || discov_changed) { new_settings(hdev, cmd->sk); - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); if (discov_changed) mgmt_update_adv_data(hdev); hci_update_background_scan(hdev); @@ -1847,7 +1852,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev, return err; if (changed) { - hci_update_page_scan(hdev, NULL); + hci_update_page_scan(hdev); hci_update_background_scan(hdev); return new_settings(hdev, sk); } @@ -2195,7 +2200,7 @@ unlock: return err; } -static void le_enable_complete(struct hci_dev *hdev, u8 status) +static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; @@ -2227,9 +2232,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status) hci_req_init(&req, hdev); update_adv_data(&req); update_scan_rsp_data(&req); + __hci_update_background_scan(&req); hci_req_run(&req, NULL); - - hci_update_background_scan(hdev); } unlock: @@ -2386,7 +2390,7 @@ unlock: hci_dev_unlock(hdev); } -static void add_uuid_complete(struct hci_dev *hdev, u8 status) +static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -2465,7 +2469,7 @@ static bool enable_service_cache(struct hci_dev *hdev) return false; } -static void remove_uuid_complete(struct hci_dev *hdev, u8 status) +static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -2550,7 +2554,7 @@ unlock: return err; } -static void set_class_complete(struct hci_dev *hdev, u8 status) +static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("status 0x%02x", status); @@ -3098,16 +3102,17 @@ static struct pending_cmd *find_pairing(struct hci_conn *conn) return NULL; } -static void pairing_complete(struct pending_cmd *cmd, u8 status) +static int pairing_complete(struct pending_cmd *cmd, u8 status) { struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; + int err; bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status, + &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ conn->connect_cfm_cb = NULL; @@ -3122,6 +3127,8 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags); hci_conn_put(conn); + + return err; } void mgmt_smp_complete(struct hci_conn *conn, bool complete) @@ -3481,7 +3488,7 @@ static void update_name(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); } -static void set_name_complete(struct hci_dev *hdev, u8 status) +static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; struct pending_cmd *cmd; @@ -3629,10 +3636,16 @@ unlock: static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { + struct mgmt_addr_info *addr = data; int err; BT_DBG("%s ", hdev->name); + if (!bdaddr_type_is_valid(addr->type)) + return cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, addr, + sizeof(*addr)); + hci_dev_lock(hdev); if (len == MGMT_ADD_REMOTE_OOB_DATA_SIZE) { @@ -3659,28 +3672,53 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, status, &cp->addr, sizeof(cp->addr)); } else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) { struct mgmt_cp_add_remote_oob_ext_data *cp = data; - u8 *rand192, *hash192; + u8 *rand192, *hash192, *rand256, *hash256; u8 status; - if (cp->addr.type != BDADDR_BREDR) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_ADD_REMOTE_OOB_DATA, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); - goto unlock; - } - if (bdaddr_type_is_le(cp->addr.type)) { + /* Enforce zero-valued 192-bit parameters as + * long as legacy SMP OOB isn't implemented. + */ + if (memcmp(cp->rand192, ZERO_KEY, 16) || + memcmp(cp->hash192, ZERO_KEY, 16)) { + err = cmd_complete(sk, hdev->id, + MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_INVALID_PARAMS, + addr, sizeof(*addr)); + goto unlock; + } + rand192 = NULL; hash192 = NULL; } else { - rand192 = cp->rand192; - hash192 = cp->hash192; + /* In case one of the P-192 values is set to zero, + * then just disable OOB data for P-192. + */ + if (!memcmp(cp->rand192, ZERO_KEY, 16) || + !memcmp(cp->hash192, ZERO_KEY, 16)) { + rand192 = NULL; + hash192 = NULL; + } else { + rand192 = cp->rand192; + hash192 = cp->hash192; + } + } + + /* In case one of the P-256 values is set to zero, then just + * disable OOB data for P-256. + */ + if (!memcmp(cp->rand256, ZERO_KEY, 16) || + !memcmp(cp->hash256, ZERO_KEY, 16)) { + rand256 = NULL; + hash256 = NULL; + } else { + rand256 = cp->rand256; + hash256 = cp->hash256; } err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr, cp->addr.type, hash192, rand192, - cp->hash256, cp->rand256); + hash256, rand256); if (err < 0) status = MGMT_STATUS_FAILED; else @@ -3832,7 +3870,8 @@ static bool trigger_discovery(struct hci_request *req, u8 *status) return true; } -static void start_discovery_complete(struct hci_dev *hdev, u8 status) +static void start_discovery_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; unsigned long timeout; @@ -3857,6 +3896,9 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) hci_discovery_set_state(hdev, DISCOVERY_FINDING); + /* If the scan involves LE scan, pick proper timeout to schedule + * hdev->le_scan_disable that will stop it. + */ switch (hdev->discovery.type) { case DISCOV_TYPE_LE: timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); @@ -3873,9 +3915,23 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) break; } - if (timeout) + if (timeout) { + /* When service discovery is used and the controller has + * a strict duplicate filter, it is important to remember + * the start and duration of the scan. This is required + * for restarting scanning during the discovery phase. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks) && + (hdev->discovery.uuid_count > 0 || + hdev->discovery.rssi != HCI_RSSI_INVALID)) { + hdev->discovery.scan_start = jiffies; + hdev->discovery.scan_duration = timeout; + } + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout); + } unlock: hci_dev_unlock(hdev); @@ -3947,9 +4003,10 @@ failed: return err; } -static void service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) +static int service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status) { - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, 1); + return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + cmd->param, 1); } static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, @@ -4060,7 +4117,7 @@ failed: return err; } -static void stop_discovery_complete(struct hci_dev *hdev, u8 status) +static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct pending_cmd *cmd; @@ -4286,7 +4343,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, return err; } -static void set_advertising_complete(struct hci_dev *hdev, u8 status) +static void set_advertising_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct cmd_lookup match = { NULL, hdev }; @@ -4493,7 +4551,8 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return err; } -static void fast_connectable_complete(struct hci_dev *hdev, u8 status) +static void fast_connectable_complete(struct hci_dev *hdev, u8 status, + u16 opcode) { struct pending_cmd *cmd; @@ -4591,7 +4650,7 @@ unlock: return err; } -static void set_bredr_complete(struct hci_dev *hdev, u8 status) +static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct pending_cmd *cmd; @@ -4675,6 +4734,28 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, MGMT_STATUS_REJECTED); goto unlock; + } else { + /* When configuring a dual-mode controller to operate + * with LE only and using a static address, then switching + * BR/EDR back on is not allowed. + * + * Dual-mode controllers shall operate with the public + * address as its identity address for BR/EDR and LE. So + * reject the attempt to create an invalid configuration. + * + * The same restrictions applies when secure connections + * has been enabled. For BR/EDR this is a controller feature + * while for LE it is a host stack feature. This means that + * switching BR/EDR back on when secure connections has been + * enabled is not a supported transaction. + */ + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + (bacmp(&hdev->static_addr, BDADDR_ANY) || + test_bit(HCI_SC_ENABLED, &hdev->dev_flags))) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); + goto unlock; + } } if (mgmt_pending_find(MGMT_OP_SET_BREDR, hdev)) { @@ -4697,7 +4778,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); write_fast_connectable(&req, false); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); /* Since only the advertising data flags will change, there * is no need to update the scan response data. @@ -4713,30 +4794,80 @@ unlock: return err; } +static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + struct mgmt_mode *cp; + + BT_DBG("%s status %u", hdev->name, status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev); + if (!cmd) + goto unlock; + + if (status) { + cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status)); + goto remove; + } + + cp = cmd->param; + + switch (cp->val) { + case 0x00: + clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); + clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + case 0x01: + set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + clear_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + case 0x02: + set_bit(HCI_SC_ENABLED, &hdev->dev_flags); + set_bit(HCI_SC_ONLY, &hdev->dev_flags); + break; + } + + send_settings_rsp(cmd->sk, MGMT_OP_SET_SECURE_CONN, hdev); + new_settings(hdev, cmd->sk); + +remove: + mgmt_pending_remove(cmd); +unlock: + hci_dev_unlock(hdev); +} + static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct pending_cmd *cmd; + struct hci_request req; u8 val; int err; BT_DBG("request for %s", hdev->name); - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && - !lmp_sc_capable(hdev) && !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) + if (!lmp_sc_capable(hdev) && + !test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_NOT_SUPPORTED); + if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + lmp_sc_capable(hdev) && + !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, + MGMT_STATUS_REJECTED); + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN, MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); - if (!hdev_is_powered(hdev) || - (!lmp_sc_capable(hdev) && - !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) || + if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) || !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { bool changed; @@ -4783,17 +4914,14 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_send_cmd(hdev, HCI_OP_WRITE_SC_SUPPORT, 1, &val); + hci_req_init(&req, hdev); + hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, 1, &val); + err = hci_req_run(&req, sc_enable_complete); if (err < 0) { mgmt_pending_remove(cmd); goto failed; } - if (cp->val == 0x02) - set_bit(HCI_SC_ONLY, &hdev->dev_flags); - else - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - failed: hci_dev_unlock(hdev); return err; @@ -5091,10 +5219,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, return err; } -static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_conn_info rp; + int err; memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); @@ -5108,14 +5237,17 @@ static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status) rp.max_tx_power = HCI_TX_POWER_INVALID; } - cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + &rp, sizeof(rp)); hci_conn_drop(conn); hci_conn_put(conn); + + return err; } -static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status) +static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, + u16 opcode) { struct hci_cp_read_rssi *cp; struct pending_cmd *cmd; @@ -5286,11 +5418,12 @@ unlock: return err; } -static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) +static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) { struct hci_conn *conn = cmd->user_data; struct mgmt_rp_get_clock_info rp; struct hci_dev *hdev; + int err; memset(&rp, 0, sizeof(rp)); memcpy(&rp.addr, &cmd->param, sizeof(rp.addr)); @@ -5310,15 +5443,18 @@ static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status) } complete: - cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + sizeof(rp)); if (conn) { hci_conn_drop(conn); hci_conn_put(conn); } + + return err; } -static void get_clock_info_complete(struct hci_dev *hdev, u8 status) +static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct hci_cp_read_clock *hci_cp; struct pending_cmd *cmd; @@ -5425,6 +5561,65 @@ unlock: return err; } +static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) +{ + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr); + if (!conn) + return false; + + if (conn->dst_type != type) + return false; + + if (conn->state != BT_CONNECTED) + return false; + + return true; +} + +/* This function requires the caller holds hdev->lock */ +static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, + u8 addr_type, u8 auto_connect) +{ + struct hci_dev *hdev = req->hdev; + struct hci_conn_params *params; + + params = hci_conn_params_add(hdev, addr, addr_type); + if (!params) + return -EIO; + + if (params->auto_connect == auto_connect) + return 0; + + list_del_init(¶ms->action); + + switch (auto_connect) { + case HCI_AUTO_CONN_DISABLED: + case HCI_AUTO_CONN_LINK_LOSS: + __hci_update_background_scan(req); + break; + case HCI_AUTO_CONN_REPORT: + list_add(¶ms->action, &hdev->pend_le_reports); + __hci_update_background_scan(req); + break; + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + if (!is_connected(hdev, addr, addr_type)) { + list_add(¶ms->action, &hdev->pend_le_conns); + __hci_update_background_scan(req); + } + break; + } + + params->auto_connect = auto_connect; + + BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, + auto_connect); + + return 0; +} + static void device_added(struct sock *sk, struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type, u8 action) { @@ -5437,10 +5632,31 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } +static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_ADD_DEVICE, hdev); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; + struct pending_cmd *cmd; + struct hci_request req; u8 auto_conn, addr_type; int err; @@ -5457,14 +5673,24 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); + hci_req_init(&req, hdev); + hci_dev_lock(hdev); + cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + cmd->cmd_complete = addr_cmd_complete; + if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5473,7 +5699,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); goto added; } @@ -5493,19 +5719,25 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, /* If the connection parameters don't exist for this device, * they will be created and configured with defaults. */ - if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type, + if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type, auto_conn) < 0) { - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_FAILED, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, MGMT_STATUS_FAILED); + mgmt_pending_remove(cmd); goto unlock; } added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); - err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, - MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + err = hci_req_run(&req, add_device_complete); + if (err < 0) { + /* ENODATA means no HCI commands were needed (e.g. if + * the adapter is powered off). + */ + if (err == -ENODATA) + err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); + mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -5523,24 +5755,55 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); } +static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_REMOVE_DEVICE, hdev); + if (!cmd) + goto unlock; + + cmd->cmd_complete(cmd, mgmt_status(status)); + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; + struct pending_cmd *cmd; + struct hci_request req; int err; BT_DBG("%s", hdev->name); + hci_req_init(&req, hdev); + hci_dev_lock(hdev); + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_DEVICE, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + cmd->cmd_complete = addr_cmd_complete; + if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { struct hci_conn_params *params; u8 addr_type; if (!bdaddr_type_is_valid(cp->addr.type)) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5549,14 +5812,13 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, &cp->addr.bdaddr, cp->addr.type); if (err) { - err = cmd_complete(sk, hdev->id, - MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -5571,23 +5833,23 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (!params) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } list_del(¶ms->action); list_del(¶ms->list); kfree(params); - hci_update_background_scan(hdev); + __hci_update_background_scan(&req); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); } else { @@ -5595,9 +5857,9 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, struct bdaddr_list *b, *btmp; if (cp->addr.type) { - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_INVALID_PARAMS, - &cp->addr, sizeof(cp->addr)); + err = cmd->cmd_complete(cmd, + MGMT_STATUS_INVALID_PARAMS); + mgmt_pending_remove(cmd); goto unlock; } @@ -5607,7 +5869,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - hci_update_page_scan(hdev, NULL); + __hci_update_page_scan(&req); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -5620,12 +5882,19 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, BT_DBG("All LE connection parameters were removed"); - hci_update_background_scan(hdev); + __hci_update_background_scan(&req); } complete: - err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, - MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr)); + err = hci_req_run(&req, remove_device_complete); + if (err < 0) { + /* ENODATA means no HCI commands were needed (e.g. if + * the adapter is powered off). + */ + if (err == -ENODATA) + err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); + mgmt_pending_remove(cmd); + } unlock: hci_dev_unlock(hdev); @@ -6037,8 +6306,9 @@ void mgmt_index_removed(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -static void restart_le_actions(struct hci_dev *hdev) +static void restart_le_actions(struct hci_request *req) { + struct hci_dev *hdev = req->hdev; struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { @@ -6060,18 +6330,25 @@ static void restart_le_actions(struct hci_dev *hdev) } } - hci_update_background_scan(hdev); + __hci_update_background_scan(req); } -static void powered_complete(struct hci_dev *hdev, u8 status) +static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct cmd_lookup match = { NULL, hdev }; BT_DBG("status 0x%02x", status); - hci_dev_lock(hdev); + if (!status) { + /* Register the available SMP channels (BR/EDR and LE) only + * when successfully powering on the controller. This late + * registration is required so that LE SMP can clearly + * decide if the public address or static address is used. + */ + smp_register(hdev); + } - restart_le_actions(hdev); + hci_dev_lock(hdev); mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -6092,14 +6369,16 @@ static int powered_update_hci(struct hci_dev *hdev) if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && !lmp_host_ssp_capable(hdev)) { - u8 ssp = 1; + u8 mode = 0x01; - hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp); - } + hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + + if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { + u8 support = 0x01; - if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { - u8 sc = 0x01; - hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, sizeof(sc), &sc); + hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, + sizeof(support), &support); + } } if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && @@ -6130,6 +6409,8 @@ static int powered_update_hci(struct hci_dev *hdev) if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) enable_advertising(&req); + + restart_le_actions(&req); } link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); @@ -6139,7 +6420,7 @@ static int powered_update_hci(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { write_fast_connectable(&req, false); - hci_update_page_scan(hdev, &req); + __hci_update_page_scan(&req); update_class(&req); update_name(&req); update_eir(&req); @@ -6817,43 +7098,6 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) hci_req_run(&req, NULL); } -void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) -{ - struct cmd_lookup match = { NULL, hdev }; - bool changed = false; - - if (status) { - u8 mgmt_err = mgmt_status(status); - - if (enable) { - if (test_and_clear_bit(HCI_SC_ENABLED, - &hdev->dev_flags)) - new_settings(hdev, NULL); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - } - - mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev, - cmd_status_rsp, &mgmt_err); - return; - } - - if (enable) { - changed = !test_and_set_bit(HCI_SC_ENABLED, &hdev->dev_flags); - } else { - changed = test_and_clear_bit(HCI_SC_ENABLED, &hdev->dev_flags); - clear_bit(HCI_SC_ONLY, &hdev->dev_flags); - } - - mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev, - settings_rsp, &match); - - if (changed) - new_settings(hdev, match.sk); - - if (match.sk) - sock_put(match.sk); -} - static void sk_lookup(struct pending_cmd *cmd, void *data) { struct cmd_lookup *match = data; @@ -6924,28 +7168,21 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192, cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, mgmt_status(status)); } else { - if (bredr_sc_enabled(hdev) && hash256 && rand256) { - struct mgmt_rp_read_local_oob_ext_data rp; + struct mgmt_rp_read_local_oob_data rp; + size_t rp_size = sizeof(rp); - memcpy(rp.hash192, hash192, sizeof(rp.hash192)); - memcpy(rp.rand192, rand192, sizeof(rp.rand192)); + memcpy(rp.hash192, hash192, sizeof(rp.hash192)); + memcpy(rp.rand192, rand192, sizeof(rp.rand192)); + if (bredr_sc_enabled(hdev) && hash256 && rand256) { memcpy(rp.hash256, hash256, sizeof(rp.hash256)); memcpy(rp.rand256, rand256, sizeof(rp.rand256)); - - cmd_complete(cmd->sk, hdev->id, - MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, sizeof(rp)); } else { - struct mgmt_rp_read_local_oob_data rp; - - memcpy(rp.hash, hash192, sizeof(rp.hash)); - memcpy(rp.rand, rand192, sizeof(rp.rand)); - - cmd_complete(cmd->sk, hdev->id, - MGMT_OP_READ_LOCAL_OOB_DATA, 0, - &rp, sizeof(rp)); + rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256); } + + cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, 0, + &rp, rp_size); } mgmt_pending_remove(cmd); @@ -7018,6 +7255,21 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) return false; } +static void restart_le_scan(struct hci_dev *hdev) +{ + /* If controller is not scanning we are done. */ + if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) + return; + + if (time_after(jiffies + DISCOV_LE_RESTART_DELAY, + hdev->discovery.scan_start + + hdev->discovery.scan_duration)) + return; + + queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart, + DISCOV_LE_RESTART_DELAY); +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) @@ -7040,14 +7292,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, /* When using service discovery with a RSSI threshold, then check * if such a RSSI threshold is specified. If a RSSI threshold has - * been specified, then all results with a RSSI smaller than the - * RSSI threshold will be dropped. + * been specified, and HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set, + * then all results with a RSSI smaller than the RSSI threshold will be + * dropped. If the quirk is set, let it through for further processing, + * as we might need to restart the scan. * * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry, * the results are also dropped. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && - (rssi < hdev->discovery.rssi || rssi == HCI_RSSI_INVALID)) + (rssi == HCI_RSSI_INVALID || + (rssi < hdev->discovery.rssi && + !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)))) return; /* Make sure that the buffer is big enough. The 5 extra bytes @@ -7066,7 +7322,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, * However when using service discovery, the value 127 will be * returned when the RSSI is not available. */ - if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi) + if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi && + link_type == ACL_LINK) rssi = 0; bacpy(&ev->addr.bdaddr, bdaddr); @@ -7081,12 +7338,20 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, * kept and checking possible scan response data * will be skipped. */ - if (hdev->discovery.uuid_count > 0) + if (hdev->discovery.uuid_count > 0) { match = eir_has_uuids(eir, eir_len, hdev->discovery.uuid_count, hdev->discovery.uuids); - else + /* If duplicate filtering does not report RSSI changes, + * then restart scanning to ensure updated result with + * updated RSSI values. + */ + if (match && test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks)) + restart_le_scan(hdev); + } else { match = true; + } if (!match && !scan_rsp_len) return; @@ -7119,6 +7384,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, hdev->discovery.uuid_count, hdev->discovery.uuids)) return; + + /* If duplicate filtering does not report RSSI changes, + * then restart scanning to ensure updated result with + * updated RSSI values. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, + &hdev->quirks)) + restart_le_scan(hdev); } /* Append scan response data to event */ @@ -7132,6 +7405,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } + /* Validate the reported RSSI value against the RSSI threshold once more + * incase HCI_QUIRK_STRICT_DUPLICATE_FILTER forced a restart of LE + * scanning. + */ + if (hdev->discovery.rssi != HCI_RSSI_INVALID && + rssi < hdev->discovery.rssi) + return; + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); ev_size = sizeof(*ev) + eir_len + scan_rsp_len; @@ -7174,7 +7455,7 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } -static void adv_enable_complete(struct hci_dev *hdev, u8 status) +static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("%s status %u", hdev->name, status); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 73f8c75abe6e..4fea24275b17 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -771,7 +771,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bacpy(&addr.l2_bdaddr, dst); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = cpu_to_le16(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); @@ -2038,7 +2038,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) /* Bind socket */ bacpy(&addr.l2_bdaddr, ba); addr.l2_family = AF_BLUETOOTH; - addr.l2_psm = cpu_to_le16(RFCOMM_PSM); + addr.l2_psm = cpu_to_le16(L2CAP_PSM_RFCOMM); addr.l2_cid = 0; addr.l2_bdaddr_type = BDADDR_BREDR; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 2348176401a0..3c6d2c8ac1a4 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -468,7 +468,7 @@ done: static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; @@ -487,8 +487,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -509,10 +507,11 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1058,6 +1057,8 @@ int __init rfcomm_init_sockets(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_rc) > sizeof(struct sockaddr)); + err = proto_register(&rfcomm_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 30e5ea3f1ad3..76321b546e84 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -618,7 +618,7 @@ done: static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags) { - DECLARE_WAITQUEUE(wait, current); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; long timeo; int err = 0; @@ -632,8 +632,6 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; @@ -654,10 +652,10 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag } release_sock(sk); - timeo = schedule_timeout(timeo); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); } - __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) @@ -1184,6 +1182,8 @@ int __init sco_init(void) { int err; + BUILD_BUG_ON(sizeof(struct sockaddr_sco) > sizeof(struct sockaddr)); + err = proto_register(&sco_proto, 0); if (err < 0) return err; diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c new file mode 100644 index 000000000000..378f4064952c --- /dev/null +++ b/net/bluetooth/selftest.c @@ -0,0 +1,244 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/hci_core.h> + +#include "ecc.h" +#include "smp.h" +#include "selftest.h" + +#if IS_ENABLED(CONFIG_BT_SELFTEST_ECDH) + +static const u8 priv_a_1[32] __initconst = { + 0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58, + 0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a, + 0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74, + 0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f, +}; +static const u8 priv_b_1[32] __initconst = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55, +}; +static const u8 pub_a_1[64] __initconst = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20, + + 0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74, + 0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76, + 0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63, + 0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc, +}; +static const u8 pub_b_1[64] __initconst = { + 0x90, 0xa1, 0xaa, 0x2f, 0xb2, 0x77, 0x90, 0x55, + 0x9f, 0xa6, 0x15, 0x86, 0xfd, 0x8a, 0xb5, 0x47, + 0x00, 0x4c, 0x9e, 0xf1, 0x84, 0x22, 0x59, 0x09, + 0x96, 0x1d, 0xaf, 0x1f, 0xf0, 0xf0, 0xa1, 0x1e, + + 0x4a, 0x21, 0xb1, 0x15, 0xf9, 0xaf, 0x89, 0x5f, + 0x76, 0x36, 0x8e, 0xe2, 0x30, 0x11, 0x2d, 0x47, + 0x60, 0x51, 0xb8, 0x9a, 0x3a, 0x70, 0x56, 0x73, + 0x37, 0xad, 0x9d, 0x42, 0x3e, 0xf3, 0x55, 0x4c, +}; +static const u8 dhkey_1[32] __initconst = { + 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, + 0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99, + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec, +}; + +static const u8 priv_a_2[32] __initconst = { + 0x63, 0x76, 0x45, 0xd0, 0xf7, 0x73, 0xac, 0xb7, + 0xff, 0xdd, 0x03, 0x72, 0xb9, 0x72, 0x85, 0xb4, + 0x41, 0xb6, 0x5d, 0x0c, 0x5d, 0x54, 0x84, 0x60, + 0x1a, 0xa3, 0x9a, 0x3c, 0x69, 0x16, 0xa5, 0x06, +}; +static const u8 priv_b_2[32] __initconst = { + 0xba, 0x30, 0x55, 0x50, 0x19, 0xa2, 0xca, 0xa3, + 0xa5, 0x29, 0x08, 0xc6, 0xb5, 0x03, 0x88, 0x7e, + 0x03, 0x2b, 0x50, 0x73, 0xd4, 0x2e, 0x50, 0x97, + 0x64, 0xcd, 0x72, 0x0d, 0x67, 0xa0, 0x9a, 0x52, +}; +static const u8 pub_a_2[64] __initconst = { + 0xdd, 0x78, 0x5c, 0x74, 0x03, 0x9b, 0x7e, 0x98, + 0xcb, 0x94, 0x87, 0x4a, 0xad, 0xfa, 0xf8, 0xd5, + 0x43, 0x3e, 0x5c, 0xaf, 0xea, 0xb5, 0x4c, 0xf4, + 0x9e, 0x80, 0x79, 0x57, 0x7b, 0xa4, 0x31, 0x2c, + + 0x4f, 0x5d, 0x71, 0x43, 0x77, 0x43, 0xf8, 0xea, + 0xd4, 0x3e, 0xbd, 0x17, 0x91, 0x10, 0x21, 0xd0, + 0x1f, 0x87, 0x43, 0x8e, 0x40, 0xe2, 0x52, 0xcd, + 0xbe, 0xdf, 0x98, 0x38, 0x18, 0x12, 0x95, 0x91, +}; +static const u8 pub_b_2[64] __initconst = { + 0xcc, 0x00, 0x65, 0xe1, 0xf5, 0x6c, 0x0d, 0xcf, + 0xec, 0x96, 0x47, 0x20, 0x66, 0xc9, 0xdb, 0x84, + 0x81, 0x75, 0xa8, 0x4d, 0xc0, 0xdf, 0xc7, 0x9d, + 0x1b, 0x3f, 0x3d, 0xf2, 0x3f, 0xe4, 0x65, 0xf4, + + 0x79, 0xb2, 0xec, 0xd8, 0xca, 0x55, 0xa1, 0xa8, + 0x43, 0x4d, 0x6b, 0xca, 0x10, 0xb0, 0xc2, 0x01, + 0xc2, 0x33, 0x4e, 0x16, 0x24, 0xc4, 0xef, 0xee, + 0x99, 0xd8, 0xbb, 0xbc, 0x48, 0xd0, 0x01, 0x02, +}; +static const u8 dhkey_2[32] __initconst = { + 0x69, 0xeb, 0x21, 0x32, 0xf2, 0xc6, 0x05, 0x41, + 0x60, 0x19, 0xcd, 0x5e, 0x94, 0xe1, 0xe6, 0x5f, + 0x33, 0x07, 0xe3, 0x38, 0x4b, 0x68, 0xe5, 0x62, + 0x3f, 0x88, 0x6d, 0x2f, 0x3a, 0x84, 0x85, 0xab, +}; + +static const u8 priv_a_3[32] __initconst = { + 0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58, + 0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a, + 0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74, + 0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f, +}; +static const u8 pub_a_3[64] __initconst = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20, + + 0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74, + 0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76, + 0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63, + 0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc, +}; +static const u8 dhkey_3[32] __initconst = { + 0x2d, 0xab, 0x00, 0x48, 0xcb, 0xb3, 0x7b, 0xda, + 0x55, 0x7b, 0x8b, 0x72, 0xa8, 0x57, 0x87, 0xc3, + 0x87, 0x27, 0x99, 0x32, 0xfc, 0x79, 0x5f, 0xae, + 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70, +}; + +static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], + const u8 pub_a[64], const u8 pub_b[64], + const u8 dhkey[32]) +{ + u8 dhkey_a[32], dhkey_b[32]; + + ecdh_shared_secret(pub_b, priv_a, dhkey_a); + ecdh_shared_secret(pub_a, priv_b, dhkey_b); + + if (memcmp(dhkey_a, dhkey, 32)) + return -EINVAL; + + if (memcmp(dhkey_b, dhkey, 32)) + return -EINVAL; + + return 0; +} + +static int __init test_ecdh(void) +{ + ktime_t calltime, delta, rettime; + unsigned long long duration; + int err; + + calltime = ktime_get(); + + err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); + if (err) { + BT_ERR("ECDH sample 1 failed"); + return err; + } + + err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); + if (err) { + BT_ERR("ECDH sample 2 failed"); + return err; + } + + err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); + if (err) { + BT_ERR("ECDH sample 3 failed"); + return err; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + BT_INFO("ECDH test passed in %llu usecs", duration); + + return 0; +} + +#else + +static inline int test_ecdh(void) +{ + return 0; +} + +#endif + +static int __init run_selftest(void) +{ + int err; + + BT_INFO("Starting self testing"); + + err = test_ecdh(); + if (err) + goto done; + + err = bt_selftest_smp(); + +done: + BT_INFO("Finished self testing"); + + return err; +} + +#if IS_MODULE(CONFIG_BT) + +/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=m and is just a + * wrapper to allow running this at module init. + * + * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all. + */ +int __init bt_selftest(void) +{ + return run_selftest(); +} + +#else + +/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=y and is run + * via late_initcall() as last item in the initialization sequence. + * + * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all. + */ +static int __init bt_selftest_init(void) +{ + return run_selftest(); +} +late_initcall(bt_selftest_init); + +#endif diff --git a/net/bluetooth/selftest.h b/net/bluetooth/selftest.h new file mode 100644 index 000000000000..2aa0a346a913 --- /dev/null +++ b/net/bluetooth/selftest.h @@ -0,0 +1,45 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2014 Intel Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +#if IS_ENABLED(CONFIG_BT_SELFTEST) && IS_MODULE(CONFIG_BT) + +/* When CONFIG_BT_SELFTEST=y and the CONFIG_BT=m, then the self testing + * is run at module loading time. + */ +int bt_selftest(void); + +#else + +/* When CONFIG_BT_SELFTEST=y and CONFIG_BT=y, then the self testing + * is run via late_initcall() to make sure that subsys_initcall() of + * the Bluetooth subsystem and device_initcall() of the Crypto subsystem + * do not clash. + * + * When CONFIG_BT_SELFTEST=n, then this turns into an empty call that + * has no impact. + */ +static inline int bt_selftest(void) +{ + return 0; +} + +#endif diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index b67749bb55bf..c09a821f381d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -20,6 +20,7 @@ SOFTWARE IS DISCLAIMED. */ +#include <linux/debugfs.h> #include <linux/crypto.h> #include <linux/scatterlist.h> #include <crypto/b128ops.h> @@ -223,8 +224,9 @@ static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32], return err; } -static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16], - u8 a1[7], u8 a2[7], u8 mackey[16], u8 ltk[16]) +static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32], + const u8 n1[16], const u8 n2[16], const u8 a1[7], + const u8 a2[7], u8 mackey[16], u8 ltk[16]) { /* The btle, salt and length "magic" values are as defined in * the SMP section of the Bluetooth core specification. In ASCII @@ -276,7 +278,7 @@ static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16], } static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], - const u8 n1[16], u8 n2[16], const u8 r[16], + const u8 n1[16], const u8 n2[16], const u8 r[16], const u8 io_cap[3], const u8 a1[7], const u8 a2[7], u8 res[16]) { @@ -298,7 +300,7 @@ static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], if (err) return err; - BT_DBG("res %16phN", res); + SMP_DBG("res %16phN", res); return err; } @@ -618,7 +620,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn, oob_data = hci_find_remote_oob_data(hdev, &hcon->dst, bdaddr_type); - if (oob_data) { + if (oob_data && oob_data->present) { set_bit(SMP_FLAG_OOB, &smp->flags); oob_flag = SMP_OOB_PRESENT; memcpy(smp->rr, oob_data->rand256, 16); @@ -1674,7 +1676,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (conn->hcon->type == ACL_LINK) { /* We must have a BR/EDR SC link */ if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) return SMP_CROSS_TRANSP_NOT_ALLOWED; set_bit(SMP_FLAG_SC, &smp->flags); @@ -2303,8 +2305,12 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, * implementations are not known of and in order to not over * complicate our implementation, simply pretend that we never * received an IRK for such a device. + * + * The Identity Address must also be a Static Random or Public + * Address, which hci_is_identity_address() checks for. */ - if (!bacmp(&info->bdaddr, BDADDR_ANY)) { + if (!bacmp(&info->bdaddr, BDADDR_ANY) || + !hci_is_identity_address(&info->bdaddr, info->addr_type)) { BT_ERR("Ignoring IRK with no identity address"); goto distribute; } @@ -2737,7 +2743,7 @@ static void bredr_pairing(struct l2cap_chan *chan) /* BR/EDR must use Secure Connections for SMP */ if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) return; /* If our LE support is not enabled don't do anything */ @@ -2944,11 +2950,30 @@ create_chan: l2cap_chan_set_defaults(chan); - bacpy(&chan->src, &hdev->bdaddr); - if (cid == L2CAP_CID_SMP) - chan->src_type = BDADDR_LE_PUBLIC; - else + if (cid == L2CAP_CID_SMP) { + /* If usage of static address is forced or if the devices + * does not have a public address, then listen on the static + * address. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configued, then listen on + * the static address instead. + */ + if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + bacpy(&chan->src, &hdev->static_addr); + chan->src_type = BDADDR_LE_RANDOM; + } else { + bacpy(&chan->src, &hdev->bdaddr); + chan->src_type = BDADDR_LE_PUBLIC; + } + } else { + bacpy(&chan->src, &hdev->bdaddr); chan->src_type = BDADDR_BREDR; + } + chan->state = BT_LISTEN; chan->mode = L2CAP_MODE_BASIC; chan->imtu = L2CAP_DEFAULT_MTU; @@ -2975,21 +3000,108 @@ static void smp_del_chan(struct l2cap_chan *chan) l2cap_chan_put(chan); } +static ssize_t force_bredr_smp_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[3]; + + buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t force_bredr_smp_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct hci_dev *hdev = file->private_data; + char buf[32]; + size_t buf_size = min(count, (sizeof(buf)-1)); + bool enable; + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + if (strtobool(buf, &enable)) + return -EINVAL; + + if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags)) + return -EALREADY; + + if (enable) { + struct l2cap_chan *chan; + + chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR); + if (IS_ERR(chan)) + return PTR_ERR(chan); + + hdev->smp_bredr_data = chan; + } else { + struct l2cap_chan *chan; + + chan = hdev->smp_bredr_data; + hdev->smp_bredr_data = NULL; + smp_del_chan(chan); + } + + change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags); + + return count; +} + +static const struct file_operations force_bredr_smp_fops = { + .open = simple_open, + .read = force_bredr_smp_read, + .write = force_bredr_smp_write, + .llseek = default_llseek, +}; + int smp_register(struct hci_dev *hdev) { struct l2cap_chan *chan; BT_DBG("%s", hdev->name); + /* If the controller does not support Low Energy operation, then + * there is also no need to register any SMP channel. + */ + if (!lmp_le_capable(hdev)) + return 0; + + if (WARN_ON(hdev->smp_data)) { + chan = hdev->smp_data; + hdev->smp_data = NULL; + smp_del_chan(chan); + } + chan = smp_add_cid(hdev, L2CAP_CID_SMP); if (IS_ERR(chan)) return PTR_ERR(chan); hdev->smp_data = chan; - if (!lmp_sc_capable(hdev) && - !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags)) + /* If the controller does not support BR/EDR Secure Connections + * feature, then the BR/EDR SMP channel shall not be present. + * + * To test this with Bluetooth 4.0 controllers, create a debugfs + * switch that allows forcing BR/EDR SMP support and accepting + * cross-transport pairing on non-AES encrypted connections. + */ + if (!lmp_sc_capable(hdev)) { + debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, + hdev, &force_bredr_smp_fops); return 0; + } + + if (WARN_ON(hdev->smp_bredr_data)) { + chan = hdev->smp_bredr_data; + hdev->smp_bredr_data = NULL; + smp_del_chan(chan); + } chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR); if (IS_ERR(chan)) { @@ -3021,3 +3133,331 @@ void smp_unregister(struct hci_dev *hdev) smp_del_chan(chan); } } + +#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) + +static int __init test_ah(struct crypto_blkcipher *tfm_aes) +{ + const u8 irk[16] = { + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 r[3] = { 0x94, 0x81, 0x70 }; + const u8 exp[3] = { 0xaa, 0xfb, 0x0d }; + u8 res[3]; + int err; + + err = smp_ah(tfm_aes, irk, r, res); + if (err) + return err; + + if (memcmp(res, exp, 3)) + return -EINVAL; + + return 0; +} + +static int __init test_c1(struct crypto_blkcipher *tfm_aes) +{ + const u8 k[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + const u8 r[16] = { + 0xe0, 0x2e, 0x70, 0xc6, 0x4e, 0x27, 0x88, 0x63, + 0x0e, 0x6f, 0xad, 0x56, 0x21, 0xd5, 0x83, 0x57 }; + const u8 preq[7] = { 0x01, 0x01, 0x00, 0x00, 0x10, 0x07, 0x07 }; + const u8 pres[7] = { 0x02, 0x03, 0x00, 0x00, 0x08, 0x00, 0x05 }; + const u8 _iat = 0x01; + const u8 _rat = 0x00; + const bdaddr_t ra = { { 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1 } }; + const bdaddr_t ia = { { 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1 } }; + const u8 exp[16] = { + 0x86, 0x3b, 0xf1, 0xbe, 0xc5, 0x4d, 0xa7, 0xd2, + 0xea, 0x88, 0x89, 0x87, 0xef, 0x3f, 0x1e, 0x1e }; + u8 res[16]; + int err; + + err = smp_c1(tfm_aes, k, r, preq, pres, _iat, &ia, _rat, &ra, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_s1(struct crypto_blkcipher *tfm_aes) +{ + const u8 k[16] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + const u8 r1[16] = { + 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11 }; + const u8 r2[16] = { + 0x00, 0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99 }; + const u8 exp[16] = { + 0x62, 0xa0, 0x6d, 0x79, 0xae, 0x16, 0x42, 0x5b, + 0x9b, 0xf4, 0xb0, 0xe8, 0xf0, 0xe1, 0x1f, 0x9a }; + u8 res[16]; + int err; + + err = smp_s1(tfm_aes, k, r1, r2, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f4(struct crypto_hash *tfm_cmac) +{ + const u8 u[32] = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 }; + const u8 v[32] = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 }; + const u8 x[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 z = 0x00; + const u8 exp[16] = { + 0x2d, 0x87, 0x74, 0xa9, 0xbe, 0xa1, 0xed, 0xf1, + 0x1c, 0xbd, 0xa9, 0x07, 0xf1, 0x16, 0xc9, 0xf2 }; + u8 res[16]; + int err; + + err = smp_f4(tfm_cmac, u, v, x, z, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f5(struct crypto_hash *tfm_cmac) +{ + const u8 w[32] = { + 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, + 0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99, + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 n1[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 n2[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 }; + const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 }; + const u8 exp_ltk[16] = { + 0x38, 0x0a, 0x75, 0x94, 0xb5, 0x22, 0x05, 0x98, + 0x23, 0xcd, 0xd7, 0x69, 0x11, 0x79, 0x86, 0x69 }; + const u8 exp_mackey[16] = { + 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, + 0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 }; + u8 mackey[16], ltk[16]; + int err; + + err = smp_f5(tfm_cmac, w, n1, n2, a1, a2, mackey, ltk); + if (err) + return err; + + if (memcmp(mackey, exp_mackey, 16)) + return -EINVAL; + + if (memcmp(ltk, exp_ltk, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_f6(struct crypto_hash *tfm_cmac) +{ + const u8 w[16] = { + 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, + 0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 }; + const u8 n1[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 n2[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u8 r[16] = { + 0xc8, 0x0f, 0x2d, 0x0c, 0xd2, 0x42, 0xda, 0x08, + 0x54, 0xbb, 0x53, 0xb4, 0x3b, 0x34, 0xa3, 0x12 }; + const u8 io_cap[3] = { 0x02, 0x01, 0x01 }; + const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 }; + const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 }; + const u8 exp[16] = { + 0x61, 0x8f, 0x95, 0xda, 0x09, 0x0b, 0x6c, 0xd2, + 0xc5, 0xe8, 0xd0, 0x9c, 0x98, 0x73, 0xc4, 0xe3 }; + u8 res[16]; + int err; + + err = smp_f6(tfm_cmac, w, n1, n2, r, io_cap, a1, a2, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init test_g2(struct crypto_hash *tfm_cmac) +{ + const u8 u[32] = { + 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, + 0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef, + 0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e, + 0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 }; + const u8 v[32] = { + 0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b, + 0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59, + 0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90, + 0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 }; + const u8 x[16] = { + 0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff, + 0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 }; + const u8 y[16] = { + 0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21, + 0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 }; + const u32 exp_val = 0x2f9ed5ba % 1000000; + u32 val; + int err; + + err = smp_g2(tfm_cmac, u, v, x, y, &val); + if (err) + return err; + + if (val != exp_val) + return -EINVAL; + + return 0; +} + +static int __init test_h6(struct crypto_hash *tfm_cmac) +{ + const u8 w[16] = { + 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, + 0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec }; + const u8 key_id[4] = { 0x72, 0x62, 0x65, 0x6c }; + const u8 exp[16] = { + 0x99, 0x63, 0xb1, 0x80, 0xe2, 0xa9, 0xd3, 0xe8, + 0x1c, 0xc9, 0x6d, 0xe7, 0x02, 0xe1, 0x9a, 0x2d }; + u8 res[16]; + int err; + + err = smp_h6(tfm_cmac, w, key_id, res); + if (err) + return err; + + if (memcmp(res, exp, 16)) + return -EINVAL; + + return 0; +} + +static int __init run_selftests(struct crypto_blkcipher *tfm_aes, + struct crypto_hash *tfm_cmac) +{ + ktime_t calltime, delta, rettime; + unsigned long long duration; + int err; + + calltime = ktime_get(); + + err = test_ah(tfm_aes); + if (err) { + BT_ERR("smp_ah test failed"); + return err; + } + + err = test_c1(tfm_aes); + if (err) { + BT_ERR("smp_c1 test failed"); + return err; + } + + err = test_s1(tfm_aes); + if (err) { + BT_ERR("smp_s1 test failed"); + return err; + } + + err = test_f4(tfm_cmac); + if (err) { + BT_ERR("smp_f4 test failed"); + return err; + } + + err = test_f5(tfm_cmac); + if (err) { + BT_ERR("smp_f5 test failed"); + return err; + } + + err = test_f6(tfm_cmac); + if (err) { + BT_ERR("smp_f6 test failed"); + return err; + } + + err = test_g2(tfm_cmac); + if (err) { + BT_ERR("smp_g2 test failed"); + return err; + } + + err = test_h6(tfm_cmac); + if (err) { + BT_ERR("smp_h6 test failed"); + return err; + } + + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + + BT_INFO("SMP test passed in %llu usecs", duration); + + return 0; +} + +int __init bt_selftest_smp(void) +{ + struct crypto_blkcipher *tfm_aes; + struct crypto_hash *tfm_cmac; + int err; + + tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_aes)) { + BT_ERR("Unable to create ECB crypto context"); + return PTR_ERR(tfm_aes); + } + + tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm_cmac)) { + BT_ERR("Unable to create CMAC crypto context"); + crypto_free_blkcipher(tfm_aes); + return PTR_ERR(tfm_cmac); + } + + err = run_selftests(tfm_aes, tfm_cmac); + + crypto_free_hash(tfm_cmac); + crypto_free_blkcipher(tfm_aes); + + return err; +} + +#endif diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 3296bf42ae80..60c5b73fcb4b 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -192,4 +192,17 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa); int smp_register(struct hci_dev *hdev); void smp_unregister(struct hci_dev *hdev); +#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) + +int bt_selftest_smp(void); + +#else + +static inline int bt_selftest_smp(void) +{ + return 0; +} + +#endif + #endif /* __SMP_H */ diff --git a/net/bridge/br.c b/net/bridge/br.c index 44425aff7cba..fb57ab6b24f9 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -19,6 +19,7 @@ #include <linux/llc.h> #include <net/llc.h> #include <net/stp.h> +#include <net/switchdev.h> #include "br_private.h" @@ -120,6 +121,48 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; +static int br_netdev_switch_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + struct netdev_switch_notifier_fdb_info *fdb_info; + int err = NOTIFY_DONE; + + rtnl_lock(); + p = br_port_get_rtnl(dev); + if (!p) + goto out; + + br = p->br; + + switch (event) { + case NETDEV_SWITCH_FDB_ADD: + fdb_info = ptr; + err = br_fdb_external_learn_add(br, p, fdb_info->addr, + fdb_info->vid); + if (err) + err = notifier_from_errno(err); + break; + case NETDEV_SWITCH_FDB_DEL: + fdb_info = ptr; + err = br_fdb_external_learn_del(br, p, fdb_info->addr, + fdb_info->vid); + if (err) + err = notifier_from_errno(err); + break; + } + +out: + rtnl_unlock(); + return err; +} + +static struct notifier_block br_netdev_switch_notifier = { + .notifier_call = br_netdev_switch_event, +}; + static void __net_exit br_net_exit(struct net *net) { struct net_device *dev; @@ -169,10 +212,14 @@ static int __init br_init(void) if (err) goto err_out3; - err = br_netlink_init(); + err = register_netdev_switch_notifier(&br_netdev_switch_notifier); if (err) goto err_out4; + err = br_netlink_init(); + if (err) + goto err_out5; + brioctl_set(br_ioctl_deviceless_stub); #if IS_ENABLED(CONFIG_ATM_LANE) @@ -185,6 +232,8 @@ static int __init br_init(void) return 0; +err_out5: + unregister_netdev_switch_notifier(&br_netdev_switch_notifier); err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: @@ -202,6 +251,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); + unregister_netdev_switch_notifier(&br_netdev_switch_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); unregister_pernet_subsys(&br_net_ops); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cc36e59db7d7..e0670d7054f9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -633,7 +633,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -686,6 +687,9 @@ int br_fdb_dump(struct sk_buff *skb, if (!(dev->priv_flags & IFF_EBRIDGE)) goto out; + if (!filter_dev) + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; @@ -697,7 +701,7 @@ int br_fdb_dump(struct sk_buff *skb, (!f->dst || f->dst->dev != filter_dev)) { if (filter_dev != dev) goto skip; - /* !f->dst is a speacial case for bridge + /* !f->dst is a special case for bridge * It means the MAC belongs to the bridge * Therefore need a little more filtering * we only want to dump the !f->dst case @@ -705,6 +709,8 @@ int br_fdb_dump(struct sk_buff *skb, if (f->dst) goto skip; } + if (!filter_dev && f->dst) + goto skip; if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, @@ -840,10 +846,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* VID was specified, so use it. */ err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { - err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + if (err || !pv) goto out; - } /* We have vlans configured on this port and user didn't * specify a VLAN. To be nice, add/update entry for every @@ -911,16 +916,15 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], err = __br_fdb_delete(p, addr, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { - err = __br_fdb_delete(p, addr, 0); + err = -ENOENT; + err &= __br_fdb_delete(p, addr, 0); + if (!pv) goto out; - } /* We have vlans configured on this port and user didn't * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - err = -ENOENT; for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err &= __br_fdb_delete(p, addr, vid); } @@ -985,26 +989,14 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } } -int br_fdb_external_learn_add(struct net_device *dev, +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct net_bridge_port *p; - struct net_bridge *br; struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; - rtnl_lock(); - - p = br_port_get_rtnl(dev); - if (!p) { - pr_info("bridge: %s not a bridge port\n", dev->name); - err = -EINVAL; - goto err_rtnl_unlock; - } - - br = p->br; - + ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1029,33 +1021,18 @@ int br_fdb_external_learn_add(struct net_device *dev, err_unlock: spin_unlock_bh(&br->hash_lock); -err_rtnl_unlock: - rtnl_unlock(); return err; } -EXPORT_SYMBOL(br_fdb_external_learn_add); -int br_fdb_external_learn_del(struct net_device *dev, +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct net_bridge_port *p; - struct net_bridge *br; struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; - rtnl_lock(); - - p = br_port_get_rtnl(dev); - if (!p) { - pr_info("bridge: %s not a bridge port\n", dev->name); - err = -EINVAL; - goto err_rtnl_unlock; - } - - br = p->br; - + ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; @@ -1066,9 +1043,6 @@ int br_fdb_external_learn_del(struct net_device *dev, err = -ENOENT; spin_unlock_bh(&br->hash_lock); -err_rtnl_unlock: - rtnl_unlock(); return err; } -EXPORT_SYMBOL(br_fdb_external_learn_del); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ed307db7a12b..b087d278c679 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -424,6 +424,7 @@ netdev_features_t br_features_recompute(struct net_bridge *br, features = netdev_increment_features(features, p->dev->features, mask); } + features = netdev_add_tso_features(features, mask); return features; } @@ -435,10 +436,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) int err = 0; bool changed_addr; - /* Don't allow bridging non-ethernet like devices */ + /* Don't allow bridging non-ethernet like devices, or DSA-enabled + * master network devices since the bridge layer rx_handler prevents + * the DSA fake ethertype handler to be invoked, so we do not strip off + * the DSA switch tag protocol header and the bridge layer just return + * RX_HANDLER_CONSUMED, stopping RX processing for these frames. + */ if ((dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || - !is_valid_ether_addr(dev->dev_addr)) + !is_valid_ether_addr(dev->dev_addr) || + netdev_uses_dsa(dev)) return -EINVAL; /* No bridging of bridges */ diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1f1de715197c..e2aa7be3a847 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -154,7 +154,8 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; if (is_broadcast_ether_addr(dest)) { - if (p->flags & BR_PROXYARP && + if (IS_ENABLED(CONFIG_INET) && + p->flags & BR_PROXYARP && skb->protocol == htons(ETH_P_ARP)) br_do_proxy_arp(skb, br, vid); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 5df05269d17a..409608960899 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -190,7 +190,8 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, nla_nest_end(skb, nest2); nla_nest_end(skb, nest); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; end: nla_nest_end(skb, nest); @@ -276,7 +277,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL); if (err < 0) return err; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index c190d22b6b3d..0ee453fad3de 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -66,17 +66,17 @@ static int brnf_pass_vlan_indev __read_mostly = 0; #endif #define IS_IP(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) #define IS_IPV6(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) #define IS_ARP(skb) \ - (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) + (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) static inline __be16 vlan_proto(const struct sk_buff *skb) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) return skb->protocol; else if (skb->protocol == htons(ETH_P_8021Q)) return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -436,11 +436,11 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct struct net_device *vlan, *br; br = bridge_parent(dev); - if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) + if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) return br; vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, - vlan_tx_tag_get(skb) & VLAN_VID_MASK); + skb_vlan_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; } @@ -987,15 +987,12 @@ static int __init br_netfilter_init(void) if (brnf_sysctl_header == NULL) { printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); - ret = -ENOMEM; - goto err1; + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + return -ENOMEM; } #endif printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; -err1: - nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); - return ret; } static void __exit br_netfilter_fini(void) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 9f5eb55a4d3a..4fbcea0e7ecb 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <net/switchdev.h> #include <uapi/linux/if_bridge.h> #include "br_private.h" @@ -67,6 +68,120 @@ static int br_port_fill_attrs(struct sk_buff *skb, return 0; } +static int br_fill_ifvlaninfo_range(struct sk_buff *skb, u16 vid_start, + u16 vid_end, u16 flags) +{ + struct bridge_vlan_info vinfo; + + if ((vid_end - vid_start) > 0) { + /* add range to skb */ + vinfo.vid = vid_start; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_BEGIN; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + + vinfo.vid = vid_end; + vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_END; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } else { + vinfo.vid = vid_start; + vinfo.flags = flags; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb, + const struct net_port_vlans *pv) +{ + u16 vid_range_start = 0, vid_range_end = 0; + u16 vid_range_flags = 0; + u16 pvid, vid, flags; + int err = 0; + + /* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan + * and mark vlan info with begin and end flags + * if vlaninfo represents a range + */ + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + flags = 0; + if (vid == pvid) + flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (vid_range_start == 0) { + goto initvars; + } else if ((vid - vid_range_end) == 1 && + flags == vid_range_flags) { + vid_range_end = vid; + continue; + } else { + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + +initvars: + vid_range_start = vid; + vid_range_end = vid; + vid_range_flags = flags; + } + + if (vid_range_start != 0) { + /* Call it once more to send any left over vlans */ + err = br_fill_ifvlaninfo_range(skb, vid_range_start, + vid_range_end, + vid_range_flags); + if (err) + return err; + } + + return 0; +} + +static int br_fill_ifvlaninfo(struct sk_buff *skb, + const struct net_port_vlans *pv) +{ + struct bridge_vlan_info vinfo; + u16 pvid, vid; + + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + vinfo.vid = vid; + vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + /* * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. @@ -121,12 +236,11 @@ static int br_fill_ifinfo(struct sk_buff *skb, } /* Check if the VID information is requested */ - if (filter_mask & RTEXT_FILTER_BRVLAN) { - struct nlattr *af; + if ((filter_mask & RTEXT_FILTER_BRVLAN) || + (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { const struct net_port_vlans *pv; - struct bridge_vlan_info vinfo; - u16 vid; - u16 pvid; + struct nlattr *af; + int err; if (port) pv = nbp_get_vlan_info(port); @@ -140,26 +254,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (!af) goto nla_put_failure; - pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - vinfo.vid = vid; - vinfo.flags = 0; - if (vid == pvid) - vinfo.flags |= BRIDGE_VLAN_INFO_PVID; - - if (test_bit(vid, pv->untagged_bitmap)) - vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; - - if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - goto nla_put_failure; - } - + if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) + err = br_fill_ifvlaninfo_compressed(skb, pv); + else + err = br_fill_ifvlaninfo(skb, pv); + if (err) + goto nla_put_failure; nla_nest_end(skb, af); } done: - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -206,69 +312,99 @@ errout: int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask) { - int err = 0; struct net_bridge_port *port = br_port_get_rtnl(dev); - if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) - goto out; + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) && + !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) + return 0; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, - filter_mask, dev); -out: - return err; + return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); } -static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { - [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, - [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, - [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, - .len = sizeof(struct bridge_vlan_info), }, -}; +static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, + int cmd, struct bridge_vlan_info *vinfo) +{ + int err = 0; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); + } else { + err = br_vlan_add(br, vinfo->vid, vinfo->flags); + } + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else { + br_vlan_delete(br, vinfo->vid); + } + break; + } + + return err; +} static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, int cmd) { - struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + struct bridge_vlan_info *vinfo_start = NULL; + struct bridge_vlan_info *vinfo = NULL; + struct nlattr *attr; int err = 0; + int rem; - err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); - if (err) - return err; + nla_for_each_nested(attr, af_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) + continue; + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) + return -EINVAL; + vinfo = nla_data(attr); + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vinfo_start) + return -EINVAL; + vinfo_start = vinfo; + continue; + } - if (tb[IFLA_BRIDGE_VLAN_INFO]) { - struct bridge_vlan_info *vinfo; + if (vinfo_start) { + struct bridge_vlan_info tmp_vinfo; + int v; - vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) + return -EINVAL; - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) - return -EINVAL; + if (vinfo->vid <= vinfo_start->vid) + return -EINVAL; - switch (cmd) { - case RTM_SETLINK: - if (p) { - err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + memcpy(&tmp_vinfo, vinfo_start, + sizeof(struct bridge_vlan_info)); + + for (v = vinfo_start->vid; v <= vinfo->vid; v++) { + tmp_vinfo.vid = v; + err = br_vlan_info(br, p, cmd, &tmp_vinfo); if (err) break; - - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - err = br_vlan_add(p->br, vinfo->vid, - vinfo->flags); - } else - err = br_vlan_add(br, vinfo->vid, vinfo->flags); - - break; - - case RTM_DELLINK: - if (p) { - nbp_vlan_delete(p, vinfo->vid); - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - br_vlan_delete(p->br, vinfo->vid); - } else - br_vlan_delete(br, vinfo->vid); - break; + } + vinfo_start = NULL; + } else { + err = br_vlan_info(br, p, cmd, vinfo); } + if (err) + break; } return err; @@ -359,13 +495,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) } /* Change state and parameters on port. */ -int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *protinfo; struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; - int err = 0; + int err = 0, ret_offload = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -407,19 +543,28 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) afspec, RTM_SETLINK); } + if (p && !(flags & BRIDGE_FLAGS_SELF)) { + /* set bridge attributes in hardware if supported + */ + ret_offload = netdev_switch_port_bridge_setlink(dev, nlh, + flags); + if (ret_offload && ret_offload != -EOPNOTSUPP) + br_warn(p->br, "error setting attrs on port %u(%s)\n", + (unsigned int)p->port_no, p->dev->name); + } + if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); - out: return err; } /* Delete port information */ -int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *afspec; struct net_bridge_port *p; - int err; + int err = 0, ret_offload = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!afspec) @@ -432,6 +577,21 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) err = br_afspec((struct net_bridge *)netdev_priv(dev), p, afspec, RTM_DELLINK); + if (err == 0) + /* Send RTM_NEWLINK because userspace + * expects RTM_NEWLINK for vlan dels + */ + br_ifinfo_notify(RTM_NEWLINK, p); + + if (p && !(flags & BRIDGE_FLAGS_SELF)) { + /* del bridge attributes in hardware + */ + ret_offload = netdev_switch_port_bridge_dellink(dev, nlh, + flags); + if (ret_offload && ret_offload != -EOPNOTSUPP) + br_warn(p->br, "error deleting attrs on port %u (%s)\n", + (unsigned int)p->port_no, p->dev->name); + } return err; } @@ -561,7 +721,7 @@ static size_t br_get_link_af_size(const struct net_device *dev) return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); } -static struct rtnl_af_ops br_af_ops = { +static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size, }; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aea3d1339b3f..de0919975a25 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -402,6 +402,10 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); +int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); +int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid); /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); @@ -628,8 +632,8 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) { int err = 0; - if (vlan_tx_tag_present(skb)) - *vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK; + if (skb_vlan_tag_present(skb)) + *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK; else { *vid = 0; err = -EINVAL; @@ -815,8 +819,8 @@ extern struct rtnl_link_ops br_link_ops; int br_netlink_init(void); void br_netlink_fini(void); void br_ifinfo_notify(int event, struct net_bridge_port *port); -int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); -int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); +int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); +int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 97b8ddf57363..13013fe8db24 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -187,7 +187,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * sent from vlan device on the bridge device, it does not have * HW accelerated vlan tag. */ - if (unlikely(!vlan_tx_tag_present(skb) && + if (unlikely(!skb_vlan_tag_present(skb) && skb->protocol == proto)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) @@ -200,7 +200,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* Protocol-mismatch, empty out vlan_tci for new tag */ skb_push(skb, ETH_HLEN); skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb_vlan_tag_get(skb)); if (unlikely(!skb)) return false; diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 8d3f8c7651f0..618568888128 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -45,8 +45,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) /* VLAN encapsulated Type/Length field, given from orig frame */ __be16 encap; - if (vlan_tx_tag_present(skb)) { - TCI = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + TCI = skb_vlan_tag_get(skb); encap = skb->protocol; } else { const struct vlan_hdr *fp; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index d9a8c05d995d..91180a7fc943 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -133,7 +133,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, __be16 ethproto; int verdict, i; - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) ethproto = htons(ETH_P_8021Q); else ethproto = h->h_proto; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index b0330aecbf97..3244aead0926 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -265,22 +265,12 @@ out: data[NFT_REG_VERDICT].verdict = NF_DROP; } -static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain) +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { - struct nft_base_chain *basechain; - - if (chain->flags & NFT_BASE_CHAIN) { - basechain = nft_base_chain(chain); - - switch (basechain->ops[0].hooknum) { - case NF_BR_PRE_ROUTING: - case NF_BR_LOCAL_IN: - break; - default: - return -EOPNOTSUPP; - } - } - return 0; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN)); } static int nft_reject_bridge_init(const struct nft_ctx *ctx, @@ -290,7 +280,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx, struct nft_reject *priv = nft_expr_priv(expr); int icmp_code, err; - err = nft_reject_bridge_validate_hooks(ctx->chain); + err = nft_reject_bridge_validate(ctx, expr, NULL); if (err < 0) return err; @@ -341,13 +331,6 @@ nla_put_failure: return -1; } -static int nft_reject_bridge_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_reject_bridge_validate_hooks(ctx->chain); -} - static struct nft_expr_type nft_reject_bridge_type; static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 4589ff67bfa9..67a4a36febd1 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -470,7 +470,6 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, ASSERT_RTNL(); caifdev = netdev_priv(dev); caif_netlink_parms(data, &caifdev->conn_req); - dev_net_set(caifdev->netdev, src_net); ret = register_netdevice(dev); if (ret) diff --git a/net/can/gw.c b/net/can/gw.c index 295f62e62eb3..a6f448e18ea8 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -575,7 +575,8 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; cancel: nlmsg_cancel(skb, nlh); diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 15845814a0f2..ba6eb17226da 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -676,7 +676,7 @@ static int calcu_signature(struct ceph_x_authorizer *au, int ret; char tmp_enc[40]; __le32 tmp[5] = { - 16u, msg->hdr.crc, msg->footer.front_crc, + cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc, msg->footer.middle_crc, msg->footer.data_crc, }; ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index a83062ceeec9..f2148e22b148 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -717,7 +717,7 @@ static int get_poolop_reply_buf(const char *src, size_t src_len, if (src_len != sizeof(u32) + dst_len) return -EINVAL; - buf_len = le32_to_cpu(*(u32 *)src); + buf_len = le32_to_cpu(*(__le32 *)src); if (buf_len != dst_len) return -EINVAL; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 555013034f7a..096d91447e06 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -23,17 +23,15 @@ struct page **ceph_get_direct_page_vector(const void __user *data, if (!pages) return ERR_PTR(-ENOMEM); - down_read(¤t->mm->mmap_sem); while (got < num_pages) { - rc = get_user_pages(current, current->mm, + rc = get_user_pages_unlocked(current, current->mm, (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, write_page, 0, pages + got, NULL); + num_pages - got, write_page, 0, pages + got); if (rc < 0) break; BUG_ON(rc == 0); got += rc; } - up_read(¤t->mm->mmap_sem); if (rc < 0) goto fail; return pages; diff --git a/net/core/Makefile b/net/core/Makefile index 235e6c50708d..fec0856dd6c0 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -2,7 +2,7 @@ # Makefile for the Linux networking core. # -obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ +obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/dev.c b/net/core/dev.c index f411c28d0a66..8f9710c62e20 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) static inline struct list_head *ptype_head(const struct packet_type *pt) { if (pt->type == htons(ETH_P_ALL)) - return &ptype_all; + return pt->dev ? &pt->dev->ptype_all : &ptype_all; else - return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; + return pt->dev ? &pt->dev->ptype_specific : + &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; } /** @@ -1694,6 +1695,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); return 0; } @@ -1733,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } +static inline void deliver_ptype_list_skb(struct sk_buff *skb, + struct packet_type **pt, + struct net_device *dev, __be16 type, + struct list_head *ptype_list) +{ + struct packet_type *ptype, *pt_prev = *pt; + + list_for_each_entry_rcu(ptype, ptype_list, list) { + if (ptype->type != type) + continue; + if (pt_prev) + deliver_skb(skb, pt_prev, dev); + pt_prev = ptype; + } + *pt = pt_prev; +} + static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) { if (!ptype->af_packet_priv || !skb->sk) @@ -1756,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) struct packet_type *ptype; struct sk_buff *skb2 = NULL; struct packet_type *pt_prev = NULL; + struct list_head *ptype_list = &ptype_all; rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_all, list) { +again: + list_for_each_entry_rcu(ptype, ptype_list, list) { /* Never send packets back to the socket * they originated from - MvS (miquels@drinkel.ow.org) */ - if ((ptype->dev == dev || !ptype->dev) && - (!skb_loop_sk(ptype, skb))) { - if (pt_prev) { - deliver_skb(skb2, pt_prev, skb->dev); - pt_prev = ptype; - continue; - } + if (skb_loop_sk(ptype, skb)) + continue; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - break; + if (pt_prev) { + deliver_skb(skb2, pt_prev, skb->dev); + pt_prev = ptype; + continue; + } - net_timestamp_set(skb2); + /* need to clone skb, done only once */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + goto out_unlock; - /* skb->nh should be correctly - set by sender, so that the second statement is - just protection against buggy protocols. - */ - skb_reset_mac_header(skb2); - - if (skb_network_header(skb2) < skb2->data || - skb_network_header(skb2) > skb_tail_pointer(skb2)) { - net_crit_ratelimited("protocol %04x is buggy, dev %s\n", - ntohs(skb2->protocol), - dev->name); - skb_reset_network_header(skb2); - } + net_timestamp_set(skb2); - skb2->transport_header = skb2->network_header; - skb2->pkt_type = PACKET_OUTGOING; - pt_prev = ptype; + /* skb->nh should be correctly + * set by sender, so that the second statement is + * just protection against buggy protocols. + */ + skb_reset_mac_header(skb2); + + if (skb_network_header(skb2) < skb2->data || + skb_network_header(skb2) > skb_tail_pointer(skb2)) { + net_crit_ratelimited("protocol %04x is buggy, dev %s\n", + ntohs(skb2->protocol), + dev->name); + skb_reset_network_header(skb2); } + + skb2->transport_header = skb2->network_header; + skb2->pkt_type = PACKET_OUTGOING; + pt_prev = ptype; } + + if (ptype_list == &ptype_all) { + ptype_list = &dev->ptype_all; + goto again; + } +out_unlock: if (pt_prev) pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); rcu_read_unlock(); @@ -2351,7 +2379,6 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { - unsigned int vlan_depth = skb->mac_len; __be16 type = skb->protocol; /* Tunnel gso handlers can set protocol to ethernet. */ @@ -2365,35 +2392,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - /* if skb->protocol is 802.1Q/AD then the header should already be - * present at mac_len - VLAN_HLEN (if mac_len > 0), or at - * ETH_HLEN otherwise - */ - if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { - if (vlan_depth) { - if (WARN_ON(vlan_depth < VLAN_HLEN)) - return 0; - vlan_depth -= VLAN_HLEN; - } else { - vlan_depth = ETH_HLEN; - } - do { - struct vlan_hdr *vh; - - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) - return 0; - - vh = (struct vlan_hdr *)(skb->data + vlan_depth); - type = vh->h_vlan_encapsulated_proto; - vlan_depth += VLAN_HLEN; - } while (type == htons(ETH_P_8021Q) || - type == htons(ETH_P_8021AD)); - } - - *depth = vlan_depth; - - return type; + return __vlan_get_protocol(skb, type, depth); } /** @@ -2522,7 +2521,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) /* If MPLS offload request, verify we are testing hardware MPLS features * instead of standard features for the netdev. */ -#ifdef CONFIG_NET_MPLS_GSO +#if IS_ENABLED(CONFIG_NET_MPLS_GSO) static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) @@ -2562,7 +2561,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t netif_skb_features(struct sk_buff *skb) { - const struct net_device *dev = skb->dev; + struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; __be16 protocol = skb->protocol; @@ -2570,11 +2569,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, features); + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; + + if (!skb_vlan_tag_present(skb)) { + if (unlikely(protocol == htons(ETH_P_8021Q) || + protocol == htons(ETH_P_8021AD))) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else { + goto finalize; + } } features = netdev_intersect_features(features, @@ -2591,6 +2600,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); +finalize: + if (dev->netdev_ops->ndo_features_check) + features &= dev->netdev_ops->ndo_features_check(skb, dev, + features); + return harmonize_features(skb, features); } EXPORT_SYMBOL(netif_skb_features); @@ -2601,7 +2615,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int len; int rc; - if (!list_empty(&ptype_all)) + if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) dev_queue_xmit_nit(skb, dev); len = skb->len; @@ -2643,7 +2657,7 @@ out: static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) { - if (vlan_tx_tag_present(skb) && + if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) skb = __vlan_hwaccel_push_inside(skb); return skb; @@ -2661,19 +2675,12 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - /* If encapsulation offload request, verify we are testing - * hardware encapsulation features instead of standard - * features for the netdev - */ - if (skb->encapsulation) - features &= dev->hw_enc_features; - if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); if (IS_ERR(segs)) { - segs = NULL; + goto out_kfree_skb; } else if (segs) { consume_skb(skb); skb = segs; @@ -3023,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd, /* One global table that all flow-based protocols share. */ struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +u32 rps_cpu_mask __read_mostly; +EXPORT_SYMBOL(rps_cpu_mask); struct static_key rps_needed __read_mostly; @@ -3079,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow **rflowp) { - struct netdev_rx_queue *rxqueue; - struct rps_map *map; + const struct rps_sock_flow_table *sock_flow_table; + struct netdev_rx_queue *rxqueue = dev->_rx; struct rps_dev_flow_table *flow_table; - struct rps_sock_flow_table *sock_flow_table; + struct rps_map *map; int cpu = -1; - u16 tcpu; + u32 tcpu; u32 hash; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); + if (unlikely(index >= dev->real_num_rx_queues)) { WARN_ONCE(dev->real_num_rx_queues > 1, "%s received packet on queue %u, but number " @@ -3096,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, dev->name, index, dev->real_num_rx_queues); goto done; } - rxqueue = dev->_rx + index; - } else - rxqueue = dev->_rx; + rxqueue += index; + } + /* Avoid computing hash if RFS/RPS is not active for this rxqueue */ + + flow_table = rcu_dereference(rxqueue->rps_flow_table); map = rcu_dereference(rxqueue->rps_map); - if (map) { - if (map->len == 1 && - !rcu_access_pointer(rxqueue->rps_flow_table)) { - tcpu = map->cpus[0]; - if (cpu_online(tcpu)) - cpu = tcpu; - goto done; - } - } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { + if (!flow_table && !map) goto done; - } skb_reset_network_header(skb); hash = skb_get_hash(skb); if (!hash) goto done; - flow_table = rcu_dereference(rxqueue->rps_flow_table); sock_flow_table = rcu_dereference(rps_sock_flow_table); if (flow_table && sock_flow_table) { - u16 next_cpu; struct rps_dev_flow *rflow; + u32 next_cpu; + u32 ident; + /* First check into global flow table if there is a match */ + ident = sock_flow_table->ents[hash & sock_flow_table->mask]; + if ((ident ^ hash) & ~rps_cpu_mask) + goto try_rps; + + next_cpu = ident & rps_cpu_mask; + + /* OK, now we know there is a match, + * we can look at the local (per receive queue) flow table + */ rflow = &flow_table->flows[hash & flow_table->mask]; tcpu = rflow->cpu; - next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; - /* * If the desired CPU (where last recvmsg was done) is * different from current CPU (one in the rx-queue flow @@ -3155,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } } +try_rps: + if (map) { tcpu = map->cpus[reciprocal_scale(hash, map->len)]; if (cpu_online(tcpu)) { @@ -3606,7 +3619,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *null_or_dev; bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; @@ -3649,11 +3661,15 @@ another_round: goto skip_taps; list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (!ptype->dev || ptype->dev == skb->dev) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } + if (pt_prev) + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = ptype; + } + + list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) { + if (pt_prev) + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = ptype; } skip_taps: @@ -3667,7 +3683,7 @@ ncls: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; @@ -3699,8 +3715,8 @@ ncls: } } - if (unlikely(vlan_tx_tag_present(skb))) { - if (vlan_tx_tag_get_id(skb)) + if (unlikely(skb_vlan_tag_present(skb))) { + if (skb_vlan_tag_get_id(skb)) skb->pkt_type = PACKET_OTHERHOST; /* Note: we might in the future use prio bits * and set skb->priority like in vlan_do_receive() @@ -3709,19 +3725,21 @@ ncls: skb->vlan_tci = 0; } + type = skb->protocol; + /* deliver only exact match when indicated */ - null_or_dev = deliver_exact ? skb->dev : NULL; + if (likely(!deliver_exact)) { + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &ptype_base[ntohs(type) & + PTYPE_HASH_MASK]); + } - type = skb->protocol; - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && - (ptype->dev == null_or_dev || ptype->dev == skb->dev || - ptype->dev == orig_dev)) { - if (pt_prev) - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = ptype; - } + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &orig_dev->ptype_specific); + + if (unlikely(skb->dev != orig_dev)) { + deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, + &skb->dev->ptype_specific); } if (pt_prev) { @@ -4006,6 +4024,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ switch (skb->ip_summed) { @@ -4557,6 +4576,68 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); +static int napi_poll(struct napi_struct *n, struct list_head *repoll) +{ + void *have; + int work, weight; + + list_del_init(&n->poll_list); + + have = netpoll_poll_lock(n); + + weight = n->weight; + + /* This NAPI_STATE_SCHED test is for avoiding a race + * with netpoll's poll_napi(). Only the entity which + * obtains the lock and sees NAPI_STATE_SCHED set will + * actually make the ->poll() call. Therefore we avoid + * accidentally calling ->poll() when NAPI is not scheduled. + */ + work = 0; + if (test_bit(NAPI_STATE_SCHED, &n->state)) { + work = n->poll(n, weight); + trace_napi_poll(n); + } + + WARN_ON_ONCE(work > weight); + + if (likely(work < weight)) + goto out_unlock; + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(napi_disable_pending(n))) { + napi_complete(n); + goto out_unlock; + } + + if (n->gro_list) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + napi_gro_flush(n, HZ >= 1000); + } + + /* Some drivers may have called napi_schedule + * prior to exhausting their budget. + */ + if (unlikely(!list_empty(&n->poll_list))) { + pr_warn_once("%s: Budget exhausted after napi rescheduled\n", + n->dev ? n->dev->name : "backlog"); + goto out_unlock; + } + + list_add_tail(&n->poll_list, repoll); + +out_unlock: + netpoll_poll_unlock(have); + + return work; +} + static void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -4564,74 +4645,34 @@ static void net_rx_action(struct softirq_action *h) int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); - void *have; local_irq_disable(); list_splice_init(&sd->poll_list, &list); local_irq_enable(); - while (!list_empty(&list)) { + for (;;) { struct napi_struct *n; - int work, weight; - - /* If softirq window is exhausted then punt. - * Allow this to run for 2 jiffies since which will allow - * an average latency of 1.5/HZ. - */ - if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) - goto softnet_break; - - n = list_first_entry(&list, struct napi_struct, poll_list); - list_del_init(&n->poll_list); - - have = netpoll_poll_lock(n); - - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race - * with netpoll's poll_napi(). Only the entity which - * obtains the lock and sees NAPI_STATE_SCHED set will - * actually make the ->poll() call. Therefore we avoid - * accidentally calling ->poll() when NAPI is not scheduled. - */ - work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) { - work = n->poll(n, weight); - trace_napi_poll(n); + if (list_empty(&list)) { + if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) + return; + break; } - WARN_ON_ONCE(work > weight); - - budget -= work; + n = list_first_entry(&list, struct napi_struct, poll_list); + budget -= napi_poll(n, &repoll); - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can - * move the instance around on the list at-will. + /* If softirq window is exhausted then punt. + * Allow this to run for 2 jiffies since which will allow + * an average latency of 1.5/HZ. */ - if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) { - napi_complete(n); - } else { - if (n->gro_list) { - /* flush too old packets - * If HZ < 1000, flush all packets. - */ - napi_gro_flush(n, HZ >= 1000); - } - list_add_tail(&n->poll_list, &repoll); - } + if (unlikely(budget <= 0 || + time_after_eq(jiffies, time_limit))) { + sd->time_squeeze++; + break; } - - netpoll_poll_unlock(have); } - if (!sd_has_rps_ipi_waiting(sd) && - list_empty(&list) && - list_empty(&repoll)) - return; -out: local_irq_disable(); list_splice_tail_init(&sd->poll_list, &list); @@ -4641,12 +4682,6 @@ out: __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); - - return; - -softnet_break: - sd->time_squeeze++; - goto out; } struct netdev_adjacent { @@ -5298,7 +5333,27 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void netdev_adjacent_add_links(struct net_device *dev) +/** + * netdev_bonding_info_change - Dispatch event about slave change + * @dev: device + * @bonding_info: info to dispatch + * + * Send NETDEV_BONDING_INFO to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_bonding_info_change(struct net_device *dev, + struct netdev_bonding_info *bonding_info) +{ + struct netdev_notifier_bonding_info info; + + memcpy(&info.bonding_info, bonding_info, + sizeof(struct netdev_bonding_info)); + call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, + &info.info); +} +EXPORT_SYMBOL(netdev_bonding_info_change); + +static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -5323,7 +5378,7 @@ void netdev_adjacent_add_links(struct net_device *dev) } } -void netdev_adjacent_del_links(struct net_device *dev) +static void netdev_adjacent_del_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -6147,13 +6202,16 @@ static int netif_alloc_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; + size_t sz = count * sizeof(*rx); BUG_ON(count < 1); - rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); - if (!rx) - return -ENOMEM; - + rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!rx) { + rx = vzalloc(sz); + if (!rx) + return -ENOMEM; + } dev->_rx = rx; for (i = 0; i < count; i++) @@ -6551,6 +6609,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); + BUG_ON(!list_empty(&dev->ptype_all)); + BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); @@ -6631,7 +6691,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) if (!queue) return NULL; netdev_init_one_queue(dev, queue, NULL); - queue->qdisc = &noop_qdisc; + RCU_INIT_POINTER(queue->qdisc, &noop_qdisc); queue->qdisc_sleeping = &noop_qdisc; rcu_assign_pointer(dev->ingress_queue, queue); #endif @@ -6733,6 +6793,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->adj_list.lower); INIT_LIST_HEAD(&dev->all_adj_list.upper); INIT_LIST_HEAD(&dev->all_adj_list.lower); + INIT_LIST_HEAD(&dev->ptype_all); + INIT_LIST_HEAD(&dev->ptype_specific); dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -6783,7 +6845,7 @@ void free_netdev(struct net_device *dev) netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS - kfree(dev->_rx); + kvfree(dev->_rx); #endif kfree(rcu_dereference_protected(dev->ingress_queue, 1)); @@ -7047,10 +7109,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } - /* Append NAPI poll list from offline CPU. */ - if (!list_empty(&oldsd->poll_list)) { - list_splice_init(&oldsd->poll_list, &sd->poll_list); - raise_softirq_irqoff(NET_RX_SOFTIRQ); + /* Append NAPI poll list from offline CPU, with one exception : + * process_backlog() must be called by cpu owning percpu backlog. + * We properly handle process_queue & input_pkt_queue later. + */ + while (!list_empty(&oldsd->poll_list)) { + struct napi_struct *napi = list_first_entry(&oldsd->poll_list, + struct napi_struct, + poll_list); + + list_del_init(&napi->poll_list); + if (napi->poll == process_backlog) + napi->state = 0; + else + ____napi_schedule(sd, napi); } raise_softirq_irqoff(NET_TX_SOFTIRQ); @@ -7058,11 +7130,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { - netif_rx_internal(skb); + netif_rx_ni(skb); input_queue_head_incr(oldsd); } - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { - netif_rx_internal(skb); + while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { + netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 550892cd6b3f..91f74f3eb204 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) return err; } +static int __ethtool_get_module_info(struct net_device *dev, + struct ethtool_modinfo *modinfo) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + + if (phydev && phydev->drv && phydev->drv->module_info) + return phydev->drv->module_info(phydev, modinfo); + + if (ops->get_module_info) + return ops->get_module_info(dev, modinfo); + + return -EOPNOTSUPP; +} + static int ethtool_get_module_info(struct net_device *dev, void __user *useraddr) { int ret; struct ethtool_modinfo modinfo; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_module_info) - return -EOPNOTSUPP; if (copy_from_user(&modinfo, useraddr, sizeof(modinfo))) return -EFAULT; - ret = ops->get_module_info(dev, &modinfo); + ret = __ethtool_get_module_info(dev, &modinfo); if (ret) return ret; @@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev, return 0; } +static int __ethtool_get_module_eeprom(struct net_device *dev, + struct ethtool_eeprom *ee, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + + if (phydev && phydev->drv && phydev->drv->module_eeprom) + return phydev->drv->module_eeprom(phydev, ee, data); + + if (ops->get_module_eeprom) + return ops->get_module_eeprom(dev, ee, data); + + return -EOPNOTSUPP; +} + static int ethtool_get_module_eeprom(struct net_device *dev, void __user *useraddr) { int ret; struct ethtool_modinfo modinfo; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->get_module_info || !ops->get_module_eeprom) - return -EOPNOTSUPP; - ret = ops->get_module_info(dev, &modinfo); + ret = __ethtool_get_module_info(dev, &modinfo); if (ret) return ret; - return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom, + return ethtool_get_any_eeprom(dev, useraddr, + __ethtool_get_module_eeprom, modinfo.eeprom_len); } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 185c341fafbd..44706e81b2e0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/core/filter.c b/net/core/filter.c index ec9baea10c16..f6bdc2b1ba01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -531,7 +531,7 @@ do_pass: *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); break; - /* Unkown instruction. */ + /* Unknown instruction. */ default: goto err; } diff --git a/net/core/flow.c b/net/core/flow.c index a0348fde1fdf..1033725be40b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -379,7 +379,7 @@ done: static void flow_cache_flush_task(struct work_struct *work) { struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_gc_work); + flow_cache_flush_work); struct net *net = container_of(xfrm, struct net, xfrm); flow_cache_flush(net); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 45084938c403..2c35c02a931e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -178,6 +178,20 @@ ipv6: return false; } } + case htons(ETH_P_TIPC): { + struct { + __be32 pre[3]; + __be32 srcnode; + } *hdr, _hdr; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + if (!hdr) + return false; + flow->src = hdr->srcnode; + flow->dst = 0; + flow->n_proto = proto; + flow->thoff = (u16)nhoff; + return true; + } case htons(ETH_P_FCOE): flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ @@ -408,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { map = rcu_dereference( - dev_maps->cpu_map[raw_smp_processor_id()]); + dev_maps->cpu_map[skb->sender_cpu - 1]); if (map) { if (map->len == 1) queue_index = map->queues[0]; @@ -454,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, { int queue_index = 0; +#ifdef CONFIG_XPS + if (skb->sender_cpu == 0) + skb->sender_cpu = raw_smp_processor_id() + 1; +#endif + if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) diff --git a/net/core/iovec.c b/net/core/iovec.c deleted file mode 100644 index dcbe98b3726a..000000000000 --- a/net/core/iovec.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * iovec manipulation routines. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Fixes: - * Andrew Lunn : Errors in iovec copying. - * Pedro Roque : Added memcpy_fromiovecend and - * csum_..._fromiovecend. - * Andi Kleen : fixed error handling for 2.1 - * Alexey Kuznetsov: 2.1 optimisations - * Andi Kleen : Fix csum*fromiovecend for IPv6. - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/net.h> -#include <linux/in6.h> -#include <asm/uaccess.h> -#include <asm/byteorder.h> -#include <net/checksum.h> -#include <net/sock.h> - -/* - * And now for the all-in-one: copy and checksum from a user iovec - * directly to a datagram - * Calls to csum_partial but the last must be in 32 bit chunks - * - * ip_build_xmit must ensure that when fragmenting only the last - * call to this function will be unaligned also. - */ -int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, - int offset, unsigned int len, __wsum *csump) -{ - __wsum csum = *csump; - int partial_cnt = 0, err = 0; - - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - - /* There is a remnant from previous iov. */ - if (partial_cnt) { - int par_len = 4 - partial_cnt; - - /* iov component is too short ... */ - if (par_len > copy) { - if (copy_from_user(kdata, base, copy)) - goto out_fault; - kdata += copy; - base += copy; - partial_cnt += copy; - len -= copy; - iov++; - if (len) - continue; - *csump = csum_partial(kdata - partial_cnt, - partial_cnt, csum); - goto out; - } - if (copy_from_user(kdata, base, par_len)) - goto out_fault; - csum = csum_partial(kdata - partial_cnt, 4, csum); - kdata += par_len; - base += par_len; - copy -= par_len; - len -= par_len; - partial_cnt = 0; - } - - if (len > copy) { - partial_cnt = copy % 4; - if (partial_cnt) { - copy -= partial_cnt; - if (copy_from_user(kdata + copy, base + copy, - partial_cnt)) - goto out_fault; - } - } - - if (copy) { - csum = csum_and_copy_from_user(base, kdata, copy, - csum, &err); - if (err) - goto out; - } - len -= copy + partial_cnt; - kdata += copy + partial_cnt; - iov++; - } - *csump = csum; -out: - return err; - -out_fault: - err = -EFAULT; - goto out; -} -EXPORT_SYMBOL(csum_partial_copy_fromiovecend); - -unsigned long iov_pages(const struct iovec *iov, int offset, - unsigned long nr_segs) -{ - unsigned long seg, base; - int pages = 0, len, size; - - while (nr_segs && (offset >= iov->iov_len)) { - offset -= iov->iov_len; - ++iov; - --nr_segs; - } - - for (seg = 0; seg < nr_segs; seg++) { - base = (unsigned long)iov[seg].iov_base + offset; - len = iov[seg].iov_len - offset; - size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; - pages += size; - offset = 0; - } - - return pages; -} -EXPORT_SYMBOL(iov_pages); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8e38f17288d3..70fe9e10ac86 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, goto nla_put_failure; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: read_unlock_bh(&tbl->lock); @@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb, goto errout; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: read_unlock_bh(&tbl->lock); nlmsg_cancel(skb, nlh); @@ -2043,6 +2045,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) case NDTPA_BASE_REACHABLE_TIME: NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, nla_get_msecs(tbp[i])); + /* update reachable_time as well, otherwise, the change will + * only be effective after the next time neigh_periodic_work + * decides to recompute it (can be multiple minutes) + */ + p->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); break; case NDTPA_GC_STALETIME: NEIGH_VAR_SET(p, GC_STALETIME, @@ -2120,7 +2128,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + NLM_F_MULTI) < 0) break; nidx = 0; @@ -2136,7 +2144,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + NLM_F_MULTI) < 0) goto out; next: nidx++; @@ -2196,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2226,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2264,7 +2274,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI) <= 0) { + NLM_F_MULTI) < 0) { rc = -1; goto out; } @@ -2301,7 +2311,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI, tbl) <= 0) { + NLM_F_MULTI, tbl) < 0) { read_unlock_bh(&tbl->lock); rc = -1; goto out; @@ -2921,6 +2931,31 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, return ret; } +static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct neigh_parms *p = ctl->extra2; + int ret; + + if (strcmp(ctl->procname, "base_reachable_time") == 0) + ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); + else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) + ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); + else + ret = -1; + + if (write && ret == 0) { + /* update reachable_time as well, otherwise, the change will + * only be effective after the next time neigh_periodic_work + * decides to recompute it + */ + p->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + } + return ret; +} + #define NEIGH_PARMS_DATA_OFFSET(index) \ (&((struct neigh_parms *) 0)->data[index]) @@ -3047,6 +3082,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; /* ReachableTime (in milliseconds) */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; + } else { + /* Those handlers will update p->reachable_time after + * base_reachable_time(_ms) is set to ensure the new timer starts being + * applied after the next neighbour update instead of waiting for + * neigh_periodic_work to update its value (can be multiple minutes) + * So any handler that replaces them should do this as well + */ + /* ReachableTime */ + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = + neigh_proc_base_reachable_time; + /* ReachableTime (in milliseconds) */ + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = + neigh_proc_base_reachable_time; } /* Don't export sysctls to unprivileged users */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 999341244434..f2aa73bfb0e4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -614,8 +614,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, { struct rps_map *map; cpumask_var_t mask; - size_t len = 0; - int i; + int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; @@ -626,17 +625,11 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, for (i = 0; i < map->len; i++) cpumask_set_cpu(map->cpus[i], mask); - len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); - if (PAGE_SIZE - len < 3) { - rcu_read_unlock(); - free_cpumask_var(mask); - return -EINVAL; - } + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); rcu_read_unlock(); - free_cpumask_var(mask); - len += sprintf(buf + len, "\n"); - return len; + + return len < PAGE_SIZE ? len : -EINVAL; } static ssize_t store_rps_map(struct netdev_rx_queue *queue, @@ -1090,8 +1083,7 @@ static ssize_t show_xps_map(struct netdev_queue *queue, struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; - size_t len = 0; - int i; + int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; @@ -1117,15 +1109,9 @@ static ssize_t show_xps_map(struct netdev_queue *queue, } rcu_read_unlock(); - len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); - if (PAGE_SIZE - len < 3) { - free_cpumask_var(mask); - return -EINVAL; - } - + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); - len += sprintf(buf + len, "\n"); - return len; + return len < PAGE_SIZE ? len : -EINVAL; } static ssize_t store_xps_map(struct netdev_queue *queue, diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ce780c722e48..cb5290b8c428 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -15,6 +15,10 @@ #include <linux/file.h> #include <linux/export.h> #include <linux/user_namespace.h> +#include <linux/net_namespace.h> +#include <linux/rtnetlink.h> +#include <net/sock.h> +#include <net/netlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -144,6 +148,78 @@ static void ops_free_list(const struct pernet_operations *ops, } } +static int alloc_netid(struct net *net, struct net *peer, int reqid) +{ + int min = 0, max = 0; + + ASSERT_RTNL(); + + if (reqid >= 0) { + min = reqid; + max = reqid + 1; + } + + return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); +} + +/* This function is used by idr_for_each(). If net is equal to peer, the + * function returns the id so that idr_for_each() stops. Because we cannot + * returns the id 0 (idr_for_each() will not stop), we return the magic value + * NET_ID_ZERO (-1) for it. + */ +#define NET_ID_ZERO -1 +static int net_eq_idr(int id, void *net, void *peer) +{ + if (net_eq(net, peer)) + return id ? : NET_ID_ZERO; + return 0; +} + +static int __peernet2id(struct net *net, struct net *peer, bool alloc) +{ + int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); + + ASSERT_RTNL(); + + /* Magic value for id 0. */ + if (id == NET_ID_ZERO) + return 0; + if (id > 0) + return id; + + if (alloc) + return alloc_netid(net, peer, -1); + + return -ENOENT; +} + +/* This function returns the id of a peer netns. If no id is assigned, one will + * be allocated and returned. + */ +int peernet2id(struct net *net, struct net *peer) +{ + int id = __peernet2id(net, peer, true); + + return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; +} +EXPORT_SYMBOL(peernet2id); + +struct net *get_net_ns_by_id(struct net *net, int id) +{ + struct net *peer; + + if (id < 0) + return NULL; + + rcu_read_lock(); + peer = idr_find(&net->netns_ids, id); + if (peer) + get_net(peer); + rcu_read_unlock(); + + return peer; +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -158,6 +234,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) atomic_set(&net->passive, 1); net->dev_base_seq = 1; net->user_ns = user_ns; + idr_init(&net->netns_ids); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -288,6 +365,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry(net, &net_kill_list, cleanup_list) { list_del_rcu(&net->list); list_add_tail(&net->exit_list, &net_exit_list); + for_each_net(tmp) { + int id = __peernet2id(tmp, net, false); + + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + } + idr_destroy(&net->netns_ids); + } rtnl_unlock(); @@ -361,6 +446,7 @@ struct net *get_net_ns_by_fd(int fd) return ERR_PTR(-EINVAL); } #endif +EXPORT_SYMBOL_GPL(get_net_ns_by_fd); struct net *get_net_ns_by_pid(pid_t pid) { @@ -402,6 +488,130 @@ static struct pernet_operations __net_initdata net_ns_ops = { .exit = net_ns_net_exit, }; +static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { + [NETNSA_NONE] = { .type = NLA_UNSPEC }, + [NETNSA_NSID] = { .type = NLA_S32 }, + [NETNSA_PID] = { .type = NLA_U32 }, + [NETNSA_FD] = { .type = NLA_U32 }, +}; + +static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[NETNSA_MAX + 1]; + struct net *peer; + int nsid, err; + + err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy); + if (err < 0) + return err; + if (!tb[NETNSA_NSID]) + return -EINVAL; + nsid = nla_get_s32(tb[NETNSA_NSID]); + + if (tb[NETNSA_PID]) + peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); + else if (tb[NETNSA_FD]) + peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); + else + return -EINVAL; + if (IS_ERR(peer)) + return PTR_ERR(peer); + + if (__peernet2id(net, peer, false) >= 0) { + err = -EEXIST; + goto out; + } + + err = alloc_netid(net, peer, nsid); + if (err > 0) + err = 0; +out: + put_net(peer); + return err; +} + +static int rtnl_net_get_size(void) +{ + return NLMSG_ALIGN(sizeof(struct rtgenmsg)) + + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ + ; +} + +static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, + int cmd, struct net *net, struct net *peer) +{ + struct nlmsghdr *nlh; + struct rtgenmsg *rth; + int id; + + ASSERT_RTNL(); + + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); + if (!nlh) + return -EMSGSIZE; + + rth = nlmsg_data(nlh); + rth->rtgen_family = AF_UNSPEC; + + id = __peernet2id(net, peer, false); + if (id < 0) + id = NETNSA_NSID_NOT_ASSIGNED; + if (nla_put_s32(skb, NETNSA_NSID, id)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[NETNSA_MAX + 1]; + struct sk_buff *msg; + int err = -ENOBUFS; + struct net *peer; + + err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy); + if (err < 0) + return err; + if (tb[NETNSA_PID]) + peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); + else if (tb[NETNSA_FD]) + peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); + else + return -EINVAL; + + if (IS_ERR(peer)) + return PTR_ERR(peer); + + msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, + RTM_GETNSID, net, peer); + if (err < 0) + goto err_out; + + err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); + goto out; + +err_out: + nlmsg_free(msg); +out: + put_net(peer); + return err; +} + static int __init net_ns_init(void) { struct net_generic *ng; @@ -435,6 +645,9 @@ static int __init net_ns_init(void) register_pernet_subsys(&net_ns_ops); + rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL); + return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e0ad5d16c9c5..c126a878c47c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, features = netif_skb_features(skb); - if (vlan_tx_tag_present(skb) && + if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) { skb = __vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) { diff --git a/net/core/pktgen.c b/net/core/pktgen.c index da934fc3faa8..b4899f5b7388 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -97,7 +97,7 @@ * New xmit() return, do_div and misc clean up by Stephen Hemminger * <shemminger@osdl.org> 040923 * - * Randy Dunlap fixed u64 printk compiler waring + * Randy Dunlap fixed u64 printk compiler warning * * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org> * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213 @@ -2842,25 +2842,25 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; + pktgen_finalize_skb(pkt_dev, skb, datalen); + if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; } else if (odev->features & NETIF_F_V4_CSUM) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - udp4_hwcsum(skb, udph->source, udph->dest); + udp4_hwcsum(skb, iph->saddr, iph->daddr); } else { - __wsum csum = udp_csum(skb); + __wsum csum = skb_checksum(skb, skb_transport_offset(skb), datalen + 8, 0); /* add protocol-dependent pseudo-header */ - udph->check = csum_tcpudp_magic(udph->source, udph->dest, + udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen + 8, IPPROTO_UDP, csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; } - pktgen_finalize_skb(pkt_dev, skb, datalen); - #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) return NULL; @@ -2976,6 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; + pktgen_finalize_skb(pkt_dev, skb, datalen); + if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; } else if (odev->features & NETIF_F_V6_CSUM) { @@ -2984,7 +2986,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->csum_offset = offsetof(struct udphdr, check); udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0); } else { - __wsum csum = udp_csum(skb); + __wsum csum = skb_checksum(skb, skb_transport_offset(skb), udplen, 0); /* add protocol-dependent pseudo-header */ udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum); @@ -2993,8 +2995,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph->check = CSUM_MANGLED_0; } - pktgen_finalize_skb(pkt_dev, skb, datalen); - return skb; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9cf6fe9ddc0c..ab293a3066b3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -50,6 +50,7 @@ #include <net/arp.h> #include <net/route.h> #include <net/udp.h> +#include <net/tcp.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/fib_rules.h> @@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) for (i = 0; i < RTAX_MAX; i++) { if (metrics[i]) { + if (i == RTAX_CC_ALGO - 1) { + char tmp[TCP_CA_NAME_MAX], *name; + + name = tcp_ca_get_name_by_key(metrics[i], tmp); + if (!name) + continue; + if (nla_put_string(skb, i + 1, name)) + goto nla_put_failure; + } else { + if (nla_put_u32(skb, i + 1, metrics[i])) + goto nla_put_failure; + } valid++; - if (nla_put_u32(skb, i+1, metrics[i])) - goto nla_put_failure; } } @@ -864,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + + nla_total_size(4) /* IFLA_LINK_NETNSID */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ @@ -1158,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (dev->rtnl_link_ops && + dev->rtnl_link_ops->get_link_net) { + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { + int id = peernet2id(dev_net(dev), link_net); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + goto nla_put_failure; + } + } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) goto nla_put_failure; @@ -1188,7 +1212,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, af_spec); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1223,6 +1248,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, + [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1237,18 +1263,12 @@ static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { }; static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { - [IFLA_VF_MAC] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_mac) }, - [IFLA_VF_VLAN] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_vlan) }, - [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_tx_rate) }, - [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_spoofchk) }, - [IFLA_VF_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_rate) }, - [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_link_state) }, + [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, + [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, + [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, + [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1315,7 +1335,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) */ WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err <= 0) + if (err < 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1996,7 +2016,7 @@ replay: struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; - struct net *dest_net; + struct net *dest_net, *link_net = NULL; if (ops) { if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { @@ -2102,7 +2122,18 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb); + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); + + link_net = get_net_ns_by_id(dest_net, id); + if (!link_net) { + err = -EINVAL; + goto out; + } + } + + dev = rtnl_create_link(link_net ? : dest_net, ifname, + name_assign_type, ops, tb); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -2111,7 +2142,7 @@ replay: dev->ifindex = ifm->ifi_index; if (ops->newlink) { - err = ops->newlink(net, dev, tb, data); + err = ops->newlink(link_net ? : net, dev, tb, data); /* Drivers should call free_netdev() in ->destructor * and unregister it on failure after registration * so that device could be finally freed in rtnl_unlock. @@ -2130,9 +2161,26 @@ replay: } } err = rtnl_configure_link(dev, ifm); - if (err < 0) - unregister_netdevice(dev); + if (err < 0) { + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } + goto out; + } + + if (link_net) { + err = dev_change_net_namespace(dev, dest_net, ifname); + if (err < 0) + unregister_netdevice(dev); + } out: + if (link_net) + put_net(link_net); put_net(dest_net); return err; } @@ -2315,7 +2363,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2698,10 +2747,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) idx); } - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, idx); + else + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); cops = NULL; } @@ -2797,7 +2847,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_nest_end(skb, protinfo); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -2868,39 +2919,35 @@ static inline size_t bridge_nlmsg_size(void) + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ } -static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +static int rtnl_bridge_notify(struct net_device *dev) { struct net *net = dev_net(dev); - struct net_device *br_dev = netdev_master_upper_dev_get(dev); struct sk_buff *skb; int err = -EOPNOTSUPP; + if (!dev->netdev_ops->ndo_bridge_getlink) + return 0; + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); if (!skb) { err = -ENOMEM; goto errout; } - if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && - br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); - if (err < 0) - goto errout; - } + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); + if (err < 0) + goto errout; - if ((flags & BRIDGE_FLAGS_SELF) && - dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); - if (err < 0) - goto errout; - } + if (!skb->len) + goto errout; rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; errout: WARN_ON(err == -EMSGSIZE); kfree_skb(skb); - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + if (err) + rtnl_set_sk_err(net, RTNLGRP_LINK, err); return err; } @@ -2911,7 +2958,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; - u16 oflags, flags = 0; + u16 flags = 0; bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) @@ -2941,8 +2988,6 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) } } - oflags = flags; - if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); @@ -2951,7 +2996,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags); if (err) goto out; @@ -2962,17 +3007,20 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (!dev->netdev_ops->ndo_bridge_setlink) err = -EOPNOTSUPP; else - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); - - if (!err) + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh, + flags); + if (!err) { flags &= ~BRIDGE_FLAGS_SELF; + + /* Generate event to notify upper layer of bridge + * change + */ + err = rtnl_bridge_notify(dev); + } } if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); - /* Generate event to notify upper layer of bridge change */ - if (!err) - err = rtnl_bridge_notify(dev, oflags); out: return err; } @@ -2984,7 +3032,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; - u16 oflags, flags = 0; + u16 flags = 0; bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) @@ -3014,8 +3062,6 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) } } - oflags = flags; - if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); @@ -3024,7 +3070,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } - err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags); if (err) goto out; @@ -3035,17 +3081,21 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) if (!dev->netdev_ops->ndo_bridge_dellink) err = -EOPNOTSUPP; else - err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh, + flags); - if (!err) + if (!err) { flags &= ~BRIDGE_FLAGS_SELF; + + /* Generate event to notify upper layer of bridge + * change + */ + err = rtnl_bridge_notify(dev); + } } if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); - /* Generate event to notify upper layer of bridge change */ - if (!err) - err = rtnl_bridge_notify(dev, oflags); out: return err; } @@ -3135,6 +3185,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_UNREGISTER_FINAL: case NETDEV_RELEASE: case NETDEV_JOIN: + case NETDEV_BONDING_INFO: break; default: rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ae13ef6b3ea7..88c613eab142 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -74,6 +74,8 @@ #include <asm/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> +#include <linux/capability.h> +#include <linux/user_namespace.h> struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; @@ -677,13 +679,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); #endif -/* XXX: IS this still necessary? - JHS */ -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; -#endif -#endif } /* Free everything but the sk_buff shell. */ @@ -830,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif +#ifdef CONFIG_XPS + CHECK_SKB_FIELD(sender_cpu); +#endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); #ifdef CONFIG_NET_CLS_ACT @@ -3697,11 +3695,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, kfree_skb(skb); } +static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) +{ + bool ret; + + if (likely(sysctl_tstamp_allow_data || tsonly)) + return true; + + read_lock_bh(&sk->sk_callback_lock); + ret = sk->sk_socket && sk->sk_socket->file && + file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); + read_unlock_bh(&sk->sk_callback_lock); + return ret; +} + void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) { struct sock *sk = skb->sk; + if (!skb_may_tx_timestamp(sk, false)) + return; + /* take a reference to prevent skb_orphan() from freeing the socket */ sock_hold(sk); @@ -3717,19 +3732,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; + bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; - if (!sk) + if (!sk || !skb_may_tx_timestamp(sk, tsonly)) return; - if (hwtstamps) - *skb_hwtstamps(orig_skb) = *hwtstamps; + if (tsonly) + skb = alloc_skb(0, GFP_ATOMIC); else - orig_skb->tstamp = ktime_get_real(); - - skb = skb_clone(orig_skb, GFP_ATOMIC); + skb = skb_clone(orig_skb, GFP_ATOMIC); if (!skb) return; + if (tsonly) { + skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; + skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; + } + + if (hwtstamps) + *skb_hwtstamps(skb) = *hwtstamps; + else + skb->tstamp = ktime_get_real(); + __skb_complete_tx_timestamp(skb, sk, tstype); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); @@ -4148,6 +4172,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; + skb->sender_cpu = 0; + skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); @@ -4203,7 +4229,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb) struct vlan_hdr *vhdr; u16 vlan_tci; - if (unlikely(vlan_tx_tag_present(skb))) { + if (unlikely(skb_vlan_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } @@ -4289,7 +4315,7 @@ int skb_vlan_pop(struct sk_buff *skb) __be16 vlan_proto; int err; - if (likely(vlan_tx_tag_present(skb))) { + if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { if (unlikely((skb->protocol != htons(ETH_P_8021Q) && @@ -4319,7 +4345,7 @@ EXPORT_SYMBOL(skb_vlan_pop); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { unsigned int offset = skb->data - skb_mac_header(skb); int err; @@ -4329,7 +4355,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) */ __skb_push(skb, offset); err = __vlan_insert_tag(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); + skb_vlan_tag_get(skb)); if (err) return err; skb->protocol = skb->vlan_proto; diff --git a/net/core/sock.c b/net/core/sock.c index 1c7a33db1314..93c8b20c91e4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +int sysctl_tstamp_allow_data __read_mostly = 1; + struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; EXPORT_SYMBOL_GPL(memalloc_socks); @@ -840,6 +842,7 @@ set_rcvbuf: ret = -EINVAL; break; } + if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk->sk_protocol == IPPROTO_TCP) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 31baba2a71ce..433424804284 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -52,7 +52,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, if (write) { if (size) { - if (size > 1<<30) { + if (size > 1<<29) { /* Enforce limit to prevent overflow */ mutex_unlock(&sock_flow_mutex); return -EINVAL; @@ -65,7 +65,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, mutex_unlock(&sock_flow_mutex); return -ENOMEM; } - + rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; sock_table->mask = size - 1; } else sock_table = orig_sock_table; @@ -155,7 +155,7 @@ write_unlock: rcu_read_unlock(); len = min(sizeof(kbuf) - 1, *lenp); - len = cpumask_scnprintf(kbuf, len, mask); + len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); if (!len) { *lenp = 0; goto done; @@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tstamp_allow_data", + .data = &sysctl_tstamp_allow_data, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one + }, #ifdef CONFIG_RPS { .procname = "rps_sock_flow_entries", diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 4400da7739da..b2c26b081134 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -702,7 +702,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || nla_put_u32(skb, IFA_FLAGS, ifa_flags)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index d332aefb0846..df4803437888 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att int type = nla_type(attr); if (type) { - if (type > RTAX_MAX || nla_len(attr) < 4) + if (type > RTAX_MAX || type == RTAX_CC_ALGO || + nla_len(attr) < 4) goto err_inval; fi->fib_metrics[type-1] = nla_get_u32(attr); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index daccc4a36d80..1d7c1256e845 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1616,7 +1616,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0) goto errout; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); @@ -1709,9 +1710,6 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) rt->rt_flags |= RTCF_NOTIFY; err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - - if (err == 0) - goto out_free; if (err < 0) { err = -EMSGSIZE; goto out_free; @@ -1762,7 +1760,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) skb_dst_set(skb, dst_clone(&rt->dst)); if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) <= 0) { + 1, NLM_F_MULTI) < 0) { skb_dst_drop(skb); rcu_read_unlock_bh(); goto done; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 86e3807052e9..1540b506e3e0 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -29,6 +29,7 @@ #include <linux/route.h> /* RTF_xxx */ #include <net/neighbour.h> #include <net/netlink.h> +#include <net/tcp.h> #include <net/dst.h> #include <net/flow.h> #include <net/fib_rules.h> @@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(2) /* RTA_DST */ - + nla_total_size(4); /* RTA_PRIORITY */ + + nla_total_size(4) /* RTA_PRIORITY */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); @@ -365,7 +367,8 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_nest_end(skb, mp_head); } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 37317149f918..2173402d87e0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -603,7 +603,7 @@ static int dsa_of_probe(struct platform_device *pdev) pdev->dev.platform_data = pd; pd->netdev = ðernet_dev->dev; - pd->nr_chips = of_get_child_count(np); + pd->nr_chips = of_get_available_child_count(np); if (pd->nr_chips > DSA_MAX_SWITCHES) pd->nr_chips = DSA_MAX_SWITCHES; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 515569ffde8a..f23deadf42a0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -46,6 +46,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x", ds->index, ds->pd->sw_addr); ds->slave_mii_bus->parent = ds->master_dev; + ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; } @@ -520,10 +521,13 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, struct device_node *phy_dn, *port_dn; bool phy_is_fixed = false; u32 phy_flags = 0; - int ret; + int mode, ret; port_dn = cd->port_dn[p->port]; - p->phy_interface = of_get_phy_mode(port_dn); + mode = of_get_phy_mode(port_dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + p->phy_interface = mode; phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); if (of_phy_is_fixed_link(port_dn)) { @@ -558,6 +562,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, if (!p->phy) return -ENODEV; + /* Use already configured phy mode */ + p->phy_interface = p->phy->interface; phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, p->phy_interface); } else { @@ -675,18 +681,5 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); - if (p->phy != NULL) { - if (ds->drv->get_phy_flags) - p->phy->dev_flags |= ds->drv->get_phy_flags(ds, port); - - phy_attach(slave_dev, dev_name(&p->phy->dev), - PHY_INTERFACE_MODE_GMII); - - p->phy->autoneg = AUTONEG_ENABLE; - p->phy->speed = 0; - p->phy->duplex = 0; - p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg; - } - return slave_dev; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 33a140e15834..238f38d21641 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); + +struct sk_buff **eth_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff *p, **pp = NULL; + struct ethhdr *eh, *eh2; + unsigned int hlen, off_eth; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_eth = skb_gro_offset(skb); + hlen = off_eth + sizeof(*eh); + eh = skb_gro_header_fast(skb, off_eth); + if (skb_gro_header_hard(skb, hlen)) { + eh = skb_gro_header_slow(skb, hlen, off_eth); + if (unlikely(!eh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + eh2 = (struct ethhdr *)(p->data + off_eth); + if (compare_ether_header(eh, eh2)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = eh->h_proto; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, sizeof(*eh)); + skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(eth_gro_receive); + +int eth_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff); + __be16 type = eh->h_proto; + struct packet_offload *ptype; + int err = -ENOSYS; + + if (skb->encapsulation) + skb_set_inner_mac_header(skb, nhoff); + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + + sizeof(struct ethhdr)); + + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(eth_gro_complete); + +static struct packet_offload eth_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_TEB), + .callbacks = { + .gro_receive = eth_gro_receive, + .gro_complete = eth_gro_complete, + }, +}; + +static int __init eth_offload_init(void) +{ + dev_add_offload(ð_packet_offload); + + return 0; +} + +fs_initcall(eth_offload_init); diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h new file mode 100644 index 000000000000..e50f69da78eb --- /dev/null +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -0,0 +1,72 @@ +#ifndef __IEEE802154_6LOWPAN_I_H__ +#define __IEEE802154_6LOWPAN_I_H__ + +#include <linux/list.h> + +#include <net/ieee802154_netdev.h> +#include <net/inet_frag.h> + +struct lowpan_create_arg { + u16 tag; + u16 d_size; + const struct ieee802154_addr *src; + const struct ieee802154_addr *dst; +}; + +/* Equivalent of ipv4 struct ip + */ +struct lowpan_frag_queue { + struct inet_frag_queue q; + + u16 tag; + u16 d_size; + struct ieee802154_addr saddr; + struct ieee802154_addr daddr; +}; + +static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) +{ + switch (a->mode) { + case IEEE802154_ADDR_LONG: + return (((__force u64)a->extended_addr) >> 32) ^ + (((__force u64)a->extended_addr) & 0xffffffff); + case IEEE802154_ADDR_SHORT: + return (__force u32)(a->short_addr); + default: + return 0; + } +} + +struct lowpan_dev_record { + struct net_device *ldev; + struct list_head list; +}; + +/* private device info */ +struct lowpan_dev_info { + struct net_device *real_dev; /* real WPAN device ptr */ + struct mutex dev_list_mtx; /* mutex for list ops */ + u16 fragment_tag; +}; + +static inline struct +lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + +extern struct list_head lowpan_devices; + +int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); +void lowpan_net_frag_exit(void); +int lowpan_net_frag_init(void); + +void lowpan_rx_init(void); +void lowpan_rx_exit(void); + +int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len); +netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev); + +#endif /* __IEEE802154_6LOWPAN_I_H__ */ diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig new file mode 100644 index 000000000000..d24f985b0bfd --- /dev/null +++ b/net/ieee802154/6lowpan/Kconfig @@ -0,0 +1,5 @@ +config IEEE802154_6LOWPAN + tristate "6lowpan support over IEEE 802.15.4" + depends on 6LOWPAN + ---help--- + IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile new file mode 100644 index 000000000000..6bfb270a81a6 --- /dev/null +++ b/net/ieee802154/6lowpan/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o + +ieee802154_6lowpan-y := core.o rx.o reassembly.o tx.o diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c new file mode 100644 index 000000000000..055fbb71ba6f --- /dev/null +++ b/net/ieee802154/6lowpan/core.c @@ -0,0 +1,304 @@ +/* Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/ieee802154.h> + +#include <net/ipv6.h> + +#include "6lowpan_i.h" + +LIST_HEAD(lowpan_devices); +static int lowpan_open_count; + +static __le16 lowpan_get_pan_id(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); +} + +static __le16 lowpan_get_short_addr(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); +} + +static u8 lowpan_get_dsn(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + + return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); +} + +static struct header_ops lowpan_header_ops = { + .create = lowpan_header_create, +}; + +static struct lock_class_key lowpan_tx_busylock; +static struct lock_class_key lowpan_netdev_xmit_lock_key; + +static void lowpan_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &lowpan_netdev_xmit_lock_key); +} + +static int lowpan_dev_init(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); + dev->qdisc_tx_busylock = &lowpan_tx_busylock; + return 0; +} + +static const struct net_device_ops lowpan_netdev_ops = { + .ndo_init = lowpan_dev_init, + .ndo_start_xmit = lowpan_xmit, +}; + +static struct ieee802154_mlme_ops lowpan_mlme = { + .get_pan_id = lowpan_get_pan_id, + .get_short_addr = lowpan_get_short_addr, + .get_dsn = lowpan_get_dsn, +}; + +static void lowpan_setup(struct net_device *dev) +{ + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + dev->type = ARPHRD_IEEE802154; + /* Frame Control + Sequence Number + Address fields + Security Header */ + dev->hard_header_len = 2 + 1 + 20 + 14; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = IPV6_MIN_MTU; + dev->tx_queue_len = 0; + dev->flags = IFF_BROADCAST | IFF_MULTICAST; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &lowpan_netdev_ops; + dev->header_ops = &lowpan_header_ops; + dev->ml_priv = &lowpan_mlme; + dev->destructor = free_netdev; +} + +static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) + return -EINVAL; + } + return 0; +} + +static int lowpan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *real_dev; + struct lowpan_dev_record *entry; + int ret; + + ASSERT_RTNL(); + + pr_debug("adding new link\n"); + + if (!tb[IFLA_LINK]) + return -EINVAL; + /* find and hold real wpan device */ + real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + if (real_dev->type != ARPHRD_IEEE802154) { + dev_put(real_dev); + return -EINVAL; + } + + lowpan_dev_info(dev)->real_dev = real_dev; + mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + dev_put(real_dev); + lowpan_dev_info(dev)->real_dev = NULL; + return -ENOMEM; + } + + entry->ldev = dev; + + /* Set the lowpan hardware address to the wpan hardware address. */ + memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, &lowpan_devices); + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + ret = register_netdevice(dev); + if (ret >= 0) { + if (!lowpan_open_count) + lowpan_rx_init(); + lowpan_open_count++; + } + + return ret; +} + +static void lowpan_dellink(struct net_device *dev, struct list_head *head) +{ + struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); + struct net_device *real_dev = lowpan_dev->real_dev; + struct lowpan_dev_record *entry, *tmp; + + ASSERT_RTNL(); + + lowpan_open_count--; + if (!lowpan_open_count) + lowpan_rx_exit(); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (entry->ldev == dev) { + list_del(&entry->list); + kfree(entry); + } + } + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); + + unregister_netdevice_queue(dev, head); + + dev_put(real_dev); +} + +static struct rtnl_link_ops lowpan_link_ops __read_mostly = { + .kind = "lowpan", + .priv_size = sizeof(struct lowpan_dev_info), + .setup = lowpan_setup, + .newlink = lowpan_newlink, + .dellink = lowpan_dellink, + .validate = lowpan_validate, +}; + +static inline int __init lowpan_netlink_init(void) +{ + return rtnl_link_register(&lowpan_link_ops); +} + +static inline void lowpan_netlink_fini(void) +{ + rtnl_link_unregister(&lowpan_link_ops); +} + +static int lowpan_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + LIST_HEAD(del_list); + struct lowpan_dev_record *entry, *tmp; + + if (dev->type != ARPHRD_IEEE802154) + goto out; + + if (event == NETDEV_UNREGISTER) { + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (lowpan_dev_info(entry->ldev)->real_dev == dev) + lowpan_dellink(entry->ldev, &del_list); + } + + unregister_netdevice_many(&del_list); + } + +out: + return NOTIFY_DONE; +} + +static struct notifier_block lowpan_dev_notifier = { + .notifier_call = lowpan_device_event, +}; + +static int __init lowpan_init_module(void) +{ + int err = 0; + + err = lowpan_net_frag_init(); + if (err < 0) + goto out; + + err = lowpan_netlink_init(); + if (err < 0) + goto out_frag; + + err = register_netdevice_notifier(&lowpan_dev_notifier); + if (err < 0) + goto out_pack; + + return 0; + +out_pack: + lowpan_netlink_fini(); +out_frag: + lowpan_net_frag_exit(); +out: + return err; +} + +static void __exit lowpan_cleanup_module(void) +{ + lowpan_netlink_fini(); + + lowpan_net_frag_exit(); + + unregister_netdevice_notifier(&lowpan_dev_notifier); +} + +module_init(lowpan_init_module); +module_exit(lowpan_cleanup_module); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 9d980ed3ffe2..f46e4d1306f2 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -28,7 +28,7 @@ #include <net/ipv6.h> #include <net/inet_frag.h> -#include "reassembly.h" +#include "6lowpan_i.h" static const char lowpan_frags_cache_name[] = "lowpan-frags"; diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c new file mode 100644 index 000000000000..4be1d289ab2d --- /dev/null +++ b/net/ieee802154/6lowpan/rx.c @@ -0,0 +1,171 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/if_arp.h> + +#include <net/6lowpan.h> +#include <net/ieee802154_netdev.h> + +#include "6lowpan_i.h" + +static int lowpan_give_skb_to_devices(struct sk_buff *skb, + struct net_device *dev) +{ + struct lowpan_dev_record *entry; + struct sk_buff *skb_cp; + int stat = NET_RX_SUCCESS; + + skb->protocol = htons(ETH_P_IPV6); + skb->pkt_type = PACKET_HOST; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { + skb_cp = skb_copy(skb, GFP_ATOMIC); + if (!skb_cp) { + kfree_skb(skb); + rcu_read_unlock(); + return NET_RX_DROP; + } + + skb_cp->dev = entry->ldev; + stat = netif_rx(skb_cp); + if (stat == NET_RX_DROP) + break; + } + rcu_read_unlock(); + + consume_skb(skb); + + return stat; +} + +static int +iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) +{ + u8 iphc0, iphc1; + struct ieee802154_addr_sa sa, da; + void *sap, *dap; + + raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + return -EINVAL; + + if (lowpan_fetch_skb_u8(skb, &iphc0)) + return -EINVAL; + + if (lowpan_fetch_skb_u8(skb, &iphc1)) + return -EINVAL; + + ieee802154_addr_to_sa(&sa, &hdr->source); + ieee802154_addr_to_sa(&da, &hdr->dest); + + if (sa.addr_type == IEEE802154_ADDR_SHORT) + sap = &sa.short_addr; + else + sap = &sa.hwaddr; + + if (da.addr_type == IEEE802154_ADDR_SHORT) + dap = &da.short_addr; + else + dap = &da.hwaddr; + + return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, + IEEE802154_ADDR_LEN, dap, da.addr_type, + IEEE802154_ADDR_LEN, iphc0, iphc1); +} + +static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct ieee802154_hdr hdr; + int ret; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto drop; + + if (!netif_running(dev)) + goto drop_skb; + + if (skb->pkt_type == PACKET_OTHERHOST) + goto drop_skb; + + if (dev->type != ARPHRD_IEEE802154) + goto drop_skb; + + if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) + goto drop_skb; + + /* check that it's our buffer */ + if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(skb, 1); + return lowpan_give_skb_to_devices(skb, NULL); + } else { + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); + if (ret == 1) { + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + } else if (ret == -1) { + return NET_RX_DROP; + } else { + return NET_RX_SUCCESS; + } + case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ + ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); + if (ret == 1) { + ret = iphc_decompress(skb, &hdr); + if (ret < 0) + goto drop_skb; + + return lowpan_give_skb_to_devices(skb, NULL); + } else if (ret == -1) { + return NET_RX_DROP; + } else { + return NET_RX_SUCCESS; + } + default: + break; + } + } + +drop_skb: + kfree_skb(skb); +drop: + return NET_RX_DROP; +} + +static struct packet_type lowpan_packet_type = { + .type = htons(ETH_P_IEEE802154), + .func = lowpan_rcv, +}; + +void lowpan_rx_init(void) +{ + dev_add_pack(&lowpan_packet_type); +} + +void lowpan_rx_exit(void) +{ + dev_remove_pack(&lowpan_packet_type); +} diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c new file mode 100644 index 000000000000..2349070bd534 --- /dev/null +++ b/net/ieee802154/6lowpan/tx.c @@ -0,0 +1,271 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <net/6lowpan.h> +#include <net/ieee802154_netdev.h> + +#include "6lowpan_i.h" + +/* don't save pan id, it's intra pan */ +struct lowpan_addr { + u8 mode; + union { + /* IPv6 needs big endian here */ + __be64 extended_addr; + __be16 short_addr; + } u; +}; + +struct lowpan_addr_info { + struct lowpan_addr daddr; + struct lowpan_addr saddr; +}; + +static inline struct +lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb) +{ + WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info)); + return (struct lowpan_addr_info *)(skb->data - + sizeof(struct lowpan_addr_info)); +} + +int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) +{ + const u8 *saddr = _saddr; + const u8 *daddr = _daddr; + struct lowpan_addr_info *info; + + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ + if (type != ETH_P_IPV6) + return 0; + + if (!saddr) + saddr = dev->dev_addr; + + raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); + + info = lowpan_skb_priv(skb); + + /* TODO: Currently we only support extended_addr */ + info->daddr.mode = IEEE802154_ADDR_LONG; + memcpy(&info->daddr.u.extended_addr, daddr, + sizeof(info->daddr.u.extended_addr)); + info->saddr.mode = IEEE802154_ADDR_LONG; + memcpy(&info->saddr.u.extended_addr, saddr, + sizeof(info->daddr.u.extended_addr)); + + return 0; +} + +static struct sk_buff* +lowpan_alloc_frag(struct sk_buff *skb, int size, + const struct ieee802154_hdr *master_hdr) +{ + struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; + struct sk_buff *frag; + int rc; + + frag = alloc_skb(real_dev->hard_header_len + + real_dev->needed_tailroom + size, + GFP_ATOMIC); + + if (likely(frag)) { + frag->dev = real_dev; + frag->priority = skb->priority; + skb_reserve(frag, real_dev->hard_header_len); + skb_reset_network_header(frag); + *mac_cb(frag) = *mac_cb(skb); + + rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, + &master_hdr->source, size); + if (rc < 0) { + kfree_skb(frag); + return ERR_PTR(rc); + } + } else { + frag = ERR_PTR(-ENOMEM); + } + + return frag; +} + +static int +lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, + u8 *frag_hdr, int frag_hdrlen, + int offset, int len) +{ + struct sk_buff *frag; + + raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); + + frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); + if (IS_ERR(frag)) + return -PTR_ERR(frag); + + memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); + memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); + + raw_dump_table(__func__, " fragment dump", frag->data, frag->len); + + return dev_queue_xmit(frag); +} + +static int +lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, + const struct ieee802154_hdr *wpan_hdr) +{ + u16 dgram_size, dgram_offset; + __be16 frag_tag; + u8 frag_hdr[5]; + int frag_cap, frag_len, payload_cap, rc; + int skb_unprocessed, skb_offset; + + dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - + skb->mac_len; + frag_tag = htons(lowpan_dev_info(dev)->fragment_tag); + lowpan_dev_info(dev)->fragment_tag++; + + frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); + frag_hdr[1] = dgram_size & 0xff; + memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); + + payload_cap = ieee802154_max_payload(wpan_hdr); + + frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - + skb_network_header_len(skb), 8); + + skb_offset = skb_network_header_len(skb); + skb_unprocessed = skb->len - skb->mac_len - skb_offset; + + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAG1_HEAD_SIZE, 0, + frag_len + skb_network_header_len(skb)); + if (rc) { + pr_debug("%s unable to send FRAG1 packet (tag: %d)", + __func__, ntohs(frag_tag)); + goto err; + } + + frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; + frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; + frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); + + do { + dgram_offset += frag_len; + skb_offset += frag_len; + skb_unprocessed -= frag_len; + frag_len = min(frag_cap, skb_unprocessed); + + frag_hdr[4] = dgram_offset >> 3; + + rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, + LOWPAN_FRAGN_HEAD_SIZE, skb_offset, + frag_len); + if (rc) { + pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", + __func__, ntohs(frag_tag), skb_offset); + goto err; + } + } while (skb_unprocessed > frag_cap); + + consume_skb(skb); + return NET_XMIT_SUCCESS; + +err: + kfree_skb(skb); + return rc; +} + +static int lowpan_header(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee802154_addr sa, da; + struct ieee802154_mac_cb *cb = mac_cb_init(skb); + struct lowpan_addr_info info; + void *daddr, *saddr; + + memcpy(&info, lowpan_skb_priv(skb), sizeof(info)); + + /* TODO: Currently we only support extended_addr */ + daddr = &info.daddr.u.extended_addr; + saddr = &info.saddr.u.extended_addr; + + lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); + + cb->type = IEEE802154_FC_TYPE_DATA; + + /* prepare wpan address data */ + sa.mode = IEEE802154_ADDR_LONG; + sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + sa.extended_addr = ieee802154_devaddr_from_raw(saddr); + + /* intra-PAN communications */ + da.pan_id = sa.pan_id; + + /* if the destination address is the broadcast address, use the + * corresponding short address + */ + if (lowpan_is_addr_broadcast((const u8 *)daddr)) { + da.mode = IEEE802154_ADDR_SHORT; + da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + cb->ackreq = false; + } else { + da.mode = IEEE802154_ADDR_LONG; + da.extended_addr = ieee802154_devaddr_from_raw(daddr); + cb->ackreq = true; + } + + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + ETH_P_IPV6, (void *)&da, (void *)&sa, 0); +} + +netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee802154_hdr wpan_hdr; + int max_single, ret; + + pr_debug("package xmit\n"); + + /* We must take a copy of the skb before we modify/replace the ipv6 + * header as the header could be used elsewhere + */ + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + + ret = lowpan_header(skb, dev); + if (ret < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + max_single = ieee802154_max_payload(&wpan_hdr); + + if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { + skb->dev = lowpan_dev_info(dev)->real_dev; + return dev_queue_xmit(skb); + } else { + netdev_tx_t rc; + + pr_debug("frame is too big, fragmentation is needed\n"); + rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); + + return rc < 0 ? NET_XMIT_DROP : rc; + } +} diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c deleted file mode 100644 index 27eaa65e88e1..000000000000 --- a/net/ieee802154/6lowpan_rtnl.c +++ /dev/null @@ -1,729 +0,0 @@ -/* Copyright 2011, Siemens AG - * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> - */ - -/* Based on patches from Jon Smirl <jonsmirl@gmail.com> - * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -/* Jon's code is based on 6lowpan implementation for Contiki which is: - * Copyright (c) 2008, Swedish Institute of Computer Science. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Institute nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <linux/bitops.h> -#include <linux/if_arp.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/netdevice.h> -#include <linux/ieee802154.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> -#include <net/6lowpan.h> -#include <net/ipv6.h> - -#include "reassembly.h" - -static LIST_HEAD(lowpan_devices); -static int lowpan_open_count; - -/* private device info */ -struct lowpan_dev_info { - struct net_device *real_dev; /* real WPAN device ptr */ - struct mutex dev_list_mtx; /* mutex for list ops */ - u16 fragment_tag; -}; - -struct lowpan_dev_record { - struct net_device *ldev; - struct list_head list; -}; - -/* don't save pan id, it's intra pan */ -struct lowpan_addr { - u8 mode; - union { - /* IPv6 needs big endian here */ - __be64 extended_addr; - __be16 short_addr; - } u; -}; - -struct lowpan_addr_info { - struct lowpan_addr daddr; - struct lowpan_addr saddr; -}; - -static inline struct -lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) -{ - return netdev_priv(dev); -} - -static inline struct -lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb) -{ - WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info)); - return (struct lowpan_addr_info *)(skb->data - - sizeof(struct lowpan_addr_info)); -} - -static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) -{ - const u8 *saddr = _saddr; - const u8 *daddr = _daddr; - struct lowpan_addr_info *info; - - /* TODO: - * if this package isn't ipv6 one, where should it be routed? - */ - if (type != ETH_P_IPV6) - return 0; - - if (!saddr) - saddr = dev->dev_addr; - - raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); - raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); - - info = lowpan_skb_priv(skb); - - /* TODO: Currently we only support extended_addr */ - info->daddr.mode = IEEE802154_ADDR_LONG; - memcpy(&info->daddr.u.extended_addr, daddr, - sizeof(info->daddr.u.extended_addr)); - info->saddr.mode = IEEE802154_ADDR_LONG; - memcpy(&info->saddr.u.extended_addr, saddr, - sizeof(info->daddr.u.extended_addr)); - - return 0; -} - -static int lowpan_give_skb_to_devices(struct sk_buff *skb, - struct net_device *dev) -{ - struct lowpan_dev_record *entry; - struct sk_buff *skb_cp; - int stat = NET_RX_SUCCESS; - - skb->protocol = htons(ETH_P_IPV6); - skb->pkt_type = PACKET_HOST; - - rcu_read_lock(); - list_for_each_entry_rcu(entry, &lowpan_devices, list) - if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) { - skb_cp = skb_copy(skb, GFP_ATOMIC); - if (!skb_cp) { - kfree_skb(skb); - rcu_read_unlock(); - return NET_RX_DROP; - } - - skb_cp->dev = entry->ldev; - stat = netif_rx(skb_cp); - if (stat == NET_RX_DROP) - break; - } - rcu_read_unlock(); - - consume_skb(skb); - - return stat; -} - -static int -iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) -{ - u8 iphc0, iphc1; - struct ieee802154_addr_sa sa, da; - void *sap, *dap; - - raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); - /* at least two bytes will be used for the encoding */ - if (skb->len < 2) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc0)) - return -EINVAL; - - if (lowpan_fetch_skb_u8(skb, &iphc1)) - return -EINVAL; - - ieee802154_addr_to_sa(&sa, &hdr->source); - ieee802154_addr_to_sa(&da, &hdr->dest); - - if (sa.addr_type == IEEE802154_ADDR_SHORT) - sap = &sa.short_addr; - else - sap = &sa.hwaddr; - - if (da.addr_type == IEEE802154_ADDR_SHORT) - dap = &da.short_addr; - else - dap = &da.hwaddr; - - return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type, - IEEE802154_ADDR_LEN, dap, da.addr_type, - IEEE802154_ADDR_LEN, iphc0, iphc1); -} - -static struct sk_buff* -lowpan_alloc_frag(struct sk_buff *skb, int size, - const struct ieee802154_hdr *master_hdr) -{ - struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; - struct sk_buff *frag; - int rc; - - frag = alloc_skb(real_dev->hard_header_len + - real_dev->needed_tailroom + size, - GFP_ATOMIC); - - if (likely(frag)) { - frag->dev = real_dev; - frag->priority = skb->priority; - skb_reserve(frag, real_dev->hard_header_len); - skb_reset_network_header(frag); - *mac_cb(frag) = *mac_cb(skb); - - rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, - &master_hdr->source, size); - if (rc < 0) { - kfree_skb(frag); - return ERR_PTR(rc); - } - } else { - frag = ERR_PTR(-ENOMEM); - } - - return frag; -} - -static int -lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, - u8 *frag_hdr, int frag_hdrlen, - int offset, int len) -{ - struct sk_buff *frag; - - raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen); - - frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr); - if (IS_ERR(frag)) - return -PTR_ERR(frag); - - memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen); - memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len); - - raw_dump_table(__func__, " fragment dump", frag->data, frag->len); - - return dev_queue_xmit(frag); -} - -static int -lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, - const struct ieee802154_hdr *wpan_hdr) -{ - u16 dgram_size, dgram_offset; - __be16 frag_tag; - u8 frag_hdr[5]; - int frag_cap, frag_len, payload_cap, rc; - int skb_unprocessed, skb_offset; - - dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - - skb->mac_len; - frag_tag = htons(lowpan_dev_info(dev)->fragment_tag); - lowpan_dev_info(dev)->fragment_tag++; - - frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); - frag_hdr[1] = dgram_size & 0xff; - memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag)); - - payload_cap = ieee802154_max_payload(wpan_hdr); - - frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE - - skb_network_header_len(skb), 8); - - skb_offset = skb_network_header_len(skb); - skb_unprocessed = skb->len - skb->mac_len - skb_offset; - - rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, - LOWPAN_FRAG1_HEAD_SIZE, 0, - frag_len + skb_network_header_len(skb)); - if (rc) { - pr_debug("%s unable to send FRAG1 packet (tag: %d)", - __func__, ntohs(frag_tag)); - goto err; - } - - frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1; - frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN; - frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8); - - do { - dgram_offset += frag_len; - skb_offset += frag_len; - skb_unprocessed -= frag_len; - frag_len = min(frag_cap, skb_unprocessed); - - frag_hdr[4] = dgram_offset >> 3; - - rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr, - LOWPAN_FRAGN_HEAD_SIZE, skb_offset, - frag_len); - if (rc) { - pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n", - __func__, ntohs(frag_tag), skb_offset); - goto err; - } - } while (skb_unprocessed > frag_cap); - - consume_skb(skb); - return NET_XMIT_SUCCESS; - -err: - kfree_skb(skb); - return rc; -} - -static int lowpan_header(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee802154_addr sa, da; - struct ieee802154_mac_cb *cb = mac_cb_init(skb); - struct lowpan_addr_info info; - void *daddr, *saddr; - - memcpy(&info, lowpan_skb_priv(skb), sizeof(info)); - - /* TODO: Currently we only support extended_addr */ - daddr = &info.daddr.u.extended_addr; - saddr = &info.saddr.u.extended_addr; - - lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); - - cb->type = IEEE802154_FC_TYPE_DATA; - - /* prepare wpan address data */ - sa.mode = IEEE802154_ADDR_LONG; - sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - sa.extended_addr = ieee802154_devaddr_from_raw(saddr); - - /* intra-PAN communications */ - da.pan_id = sa.pan_id; - - /* if the destination address is the broadcast address, use the - * corresponding short address - */ - if (lowpan_is_addr_broadcast((const u8 *)daddr)) { - da.mode = IEEE802154_ADDR_SHORT; - da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); - cb->ackreq = false; - } else { - da.mode = IEEE802154_ADDR_LONG; - da.extended_addr = ieee802154_devaddr_from_raw(daddr); - cb->ackreq = true; - } - - return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, - ETH_P_IPV6, (void *)&da, (void *)&sa, 0); -} - -static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee802154_hdr wpan_hdr; - int max_single, ret; - - pr_debug("package xmit\n"); - - /* We must take a copy of the skb before we modify/replace the ipv6 - * header as the header could be used elsewhere - */ - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_XMIT_DROP; - - ret = lowpan_header(skb, dev); - if (ret < 0) { - kfree_skb(skb); - return NET_XMIT_DROP; - } - - if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) { - kfree_skb(skb); - return NET_XMIT_DROP; - } - - max_single = ieee802154_max_payload(&wpan_hdr); - - if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { - skb->dev = lowpan_dev_info(dev)->real_dev; - return dev_queue_xmit(skb); - } else { - netdev_tx_t rc; - - pr_debug("frame is too big, fragmentation is needed\n"); - rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); - - return rc < 0 ? NET_XMIT_DROP : rc; - } -} - -static __le16 lowpan_get_pan_id(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); -} - -static __le16 lowpan_get_short_addr(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); -} - -static u8 lowpan_get_dsn(const struct net_device *dev) -{ - struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; - - return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); -} - -static struct header_ops lowpan_header_ops = { - .create = lowpan_header_create, -}; - -static struct lock_class_key lowpan_tx_busylock; -static struct lock_class_key lowpan_netdev_xmit_lock_key; - -static void lowpan_set_lockdep_class_one(struct net_device *dev, - struct netdev_queue *txq, - void *_unused) -{ - lockdep_set_class(&txq->_xmit_lock, - &lowpan_netdev_xmit_lock_key); -} - -static int lowpan_dev_init(struct net_device *dev) -{ - netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); - dev->qdisc_tx_busylock = &lowpan_tx_busylock; - return 0; -} - -static const struct net_device_ops lowpan_netdev_ops = { - .ndo_init = lowpan_dev_init, - .ndo_start_xmit = lowpan_xmit, -}; - -static struct ieee802154_mlme_ops lowpan_mlme = { - .get_pan_id = lowpan_get_pan_id, - .get_short_addr = lowpan_get_short_addr, - .get_dsn = lowpan_get_dsn, -}; - -static void lowpan_setup(struct net_device *dev) -{ - dev->addr_len = IEEE802154_ADDR_LEN; - memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); - dev->type = ARPHRD_IEEE802154; - /* Frame Control + Sequence Number + Address fields + Security Header */ - dev->hard_header_len = 2 + 1 + 20 + 14; - dev->needed_tailroom = 2; /* FCS */ - dev->mtu = IPV6_MIN_MTU; - dev->tx_queue_len = 0; - dev->flags = IFF_BROADCAST | IFF_MULTICAST; - dev->watchdog_timeo = 0; - - dev->netdev_ops = &lowpan_netdev_ops; - dev->header_ops = &lowpan_header_ops; - dev->ml_priv = &lowpan_mlme; - dev->destructor = free_netdev; -} - -static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) -{ - if (tb[IFLA_ADDRESS]) { - if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) - return -EINVAL; - } - return 0; -} - -static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct ieee802154_hdr hdr; - int ret; - - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - goto drop; - - if (!netif_running(dev)) - goto drop_skb; - - if (skb->pkt_type == PACKET_OTHERHOST) - goto drop_skb; - - if (dev->type != ARPHRD_IEEE802154) - goto drop_skb; - - if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) - goto drop_skb; - - /* check that it's our buffer */ - if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { - /* Pull off the 1-byte of 6lowpan header. */ - skb_pull(skb, 1); - return lowpan_give_skb_to_devices(skb, NULL); - } else { - switch (skb->data[0] & 0xe0) { - case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_devices(skb, NULL); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - default: - break; - } - } - -drop_skb: - kfree_skb(skb); -drop: - return NET_RX_DROP; -} - -static struct packet_type lowpan_packet_type = { - .type = htons(ETH_P_IEEE802154), - .func = lowpan_rcv, -}; - -static int lowpan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_device *real_dev; - struct lowpan_dev_record *entry; - int ret; - - ASSERT_RTNL(); - - pr_debug("adding new link\n"); - - if (!tb[IFLA_LINK]) - return -EINVAL; - /* find and hold real wpan device */ - real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) - return -ENODEV; - if (real_dev->type != ARPHRD_IEEE802154) { - dev_put(real_dev); - return -EINVAL; - } - - lowpan_dev_info(dev)->real_dev = real_dev; - mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - dev_put(real_dev); - lowpan_dev_info(dev)->real_dev = NULL; - return -ENOMEM; - } - - entry->ldev = dev; - - /* Set the lowpan hardware address to the wpan hardware address. */ - memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); - - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - INIT_LIST_HEAD(&entry->list); - list_add_tail(&entry->list, &lowpan_devices); - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - - ret = register_netdevice(dev); - if (ret >= 0) { - if (!lowpan_open_count) - dev_add_pack(&lowpan_packet_type); - lowpan_open_count++; - } - - return ret; -} - -static void lowpan_dellink(struct net_device *dev, struct list_head *head) -{ - struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); - struct net_device *real_dev = lowpan_dev->real_dev; - struct lowpan_dev_record *entry, *tmp; - - ASSERT_RTNL(); - - lowpan_open_count--; - if (!lowpan_open_count) - dev_remove_pack(&lowpan_packet_type); - - mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (entry->ldev == dev) { - list_del(&entry->list); - kfree(entry); - } - } - mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); - - mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); - - unregister_netdevice_queue(dev, head); - - dev_put(real_dev); -} - -static struct rtnl_link_ops lowpan_link_ops __read_mostly = { - .kind = "lowpan", - .priv_size = sizeof(struct lowpan_dev_info), - .setup = lowpan_setup, - .newlink = lowpan_newlink, - .dellink = lowpan_dellink, - .validate = lowpan_validate, -}; - -static inline int __init lowpan_netlink_init(void) -{ - return rtnl_link_register(&lowpan_link_ops); -} - -static inline void lowpan_netlink_fini(void) -{ - rtnl_link_unregister(&lowpan_link_ops); -} - -static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - LIST_HEAD(del_list); - struct lowpan_dev_record *entry, *tmp; - - if (dev->type != ARPHRD_IEEE802154) - goto out; - - if (event == NETDEV_UNREGISTER) { - list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { - if (lowpan_dev_info(entry->ldev)->real_dev == dev) - lowpan_dellink(entry->ldev, &del_list); - } - - unregister_netdevice_many(&del_list); - } - -out: - return NOTIFY_DONE; -} - -static struct notifier_block lowpan_dev_notifier = { - .notifier_call = lowpan_device_event, -}; - -static int __init lowpan_init_module(void) -{ - int err = 0; - - err = lowpan_net_frag_init(); - if (err < 0) - goto out; - - err = lowpan_netlink_init(); - if (err < 0) - goto out_frag; - - err = register_netdevice_notifier(&lowpan_dev_notifier); - if (err < 0) - goto out_pack; - - return 0; - -out_pack: - lowpan_netlink_fini(); -out_frag: - lowpan_net_frag_exit(); -out: - return err; -} - -static void __exit lowpan_cleanup_module(void) -{ - lowpan_netlink_fini(); - - lowpan_net_frag_exit(); - - unregister_netdevice_notifier(&lowpan_dev_notifier); -} - -module_init(lowpan_init_module); -module_exit(lowpan_cleanup_module); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index c0d4154d144f..1370d5b0041b 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -1,4 +1,4 @@ -config IEEE802154 +menuconfig IEEE802154 tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support" ---help--- IEEE Std 802.15.4 defines a low data rate, low power and low @@ -10,8 +10,16 @@ config IEEE802154 Say Y here to compile LR-WPAN support into the kernel or say M to compile it as modules. -config IEEE802154_6LOWPAN - tristate "6lowpan support over IEEE 802.15.4" - depends on IEEE802154 && 6LOWPAN +if IEEE802154 + +config IEEE802154_SOCKET + tristate "IEEE 802.15.4 socket interface" + default y ---help--- - IPv6 compression over IEEE 802.15.4. + Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface + for 802.15.4 dataframes. Also RAW socket interface to build MAC + header from userspace. + +source "net/ieee802154/6lowpan/Kconfig" + +endif diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 9f6970f2a28b..05dab2957cd4 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,9 +1,9 @@ -obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o +obj-$(CONFIG_IEEE802154) += ieee802154.o +obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o +obj-y += 6lowpan/ -ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ header_ops.o sysfs.o nl802154.o -af_802154-y := af_ieee802154.o raw.o dgram.o +ieee802154_socket-y := socket.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h deleted file mode 100644 index 343b63e6f953..000000000000 --- a/net/ieee802154/af802154.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Internal interfaces for ieee 802.15.4 address family. - * - * Copyright 2007, 2008, 2009 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#ifndef AF802154_H -#define AF802154_H - -struct sk_buff; -struct net_device; -struct ieee802154_addr; -extern struct proto ieee802154_raw_prot; -extern struct proto ieee802154_dgram_prot; -void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb); -int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb); -struct net_device *ieee802154_get_dev(struct net *net, - const struct ieee802154_addr *addr); - -#endif diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c deleted file mode 100644 index d0a1282cdf43..000000000000 --- a/net/ieee802154/af_ieee802154.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - * IEEE802154.4 socket interface - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Maxim Gorbachyov <maxim.gorbachev@siemens.com> - */ - -#include <linux/net.h> -#include <linux/capability.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/if.h> -#include <linux/termios.h> /* For TIOCOUTQ/INQ */ -#include <linux/list.h> -#include <linux/slab.h> -#include <net/datalink.h> -#include <net/psnap.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <net/route.h> - -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include "af802154.h" - -/* Utility function for families */ -struct net_device* -ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) -{ - struct net_device *dev = NULL; - struct net_device *tmp; - __le16 pan_id, short_addr; - u8 hwaddr[IEEE802154_ADDR_LEN]; - - switch (addr->mode) { - case IEEE802154_ADDR_LONG: - ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); - rcu_read_lock(); - dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - break; - case IEEE802154_ADDR_SHORT: - if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || - addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || - addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) - break; - - rtnl_lock(); - - for_each_netdev(net, tmp) { - if (tmp->type != ARPHRD_IEEE802154) - continue; - - pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); - short_addr = - ieee802154_mlme_ops(tmp)->get_short_addr(tmp); - - if (pan_id == addr->pan_id && - short_addr == addr->short_addr) { - dev = tmp; - dev_hold(dev); - break; - } - } - - rtnl_unlock(); - break; - default: - pr_warn("Unsupported ieee802154 address type: %d\n", - addr->mode); - break; - } - - return dev; -} - -static int ieee802154_sock_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (sk) { - sock->sk = NULL; - sk->sk_prot->close(sk, 0); - } - return 0; -} - -static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - - return sk->sk_prot->sendmsg(iocb, sk, msg, len); -} - -static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, - int addr_len) -{ - struct sock *sk = sock->sk; - - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); - - return sock_no_bind(sock, uaddr, addr_len); -} - -static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) -{ - struct sock *sk = sock->sk; - - if (addr_len < sizeof(uaddr->sa_family)) - return -EINVAL; - - if (uaddr->sa_family == AF_UNSPEC) - return sk->sk_prot->disconnect(sk, flags); - - return sk->sk_prot->connect(sk, uaddr, addr_len); -} - -static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, - unsigned int cmd) -{ - struct ifreq ifr; - int ret = -ENOIOCTLCMD; - struct net_device *dev; - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; - - dev_load(sock_net(sk), ifr.ifr_name); - dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); - - if (!dev) - return -ENODEV; - - if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) - ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); - - if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) - ret = -EFAULT; - dev_put(dev); - - return ret; -} - -static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) -{ - struct sock *sk = sock->sk; - - switch (cmd) { - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *)arg); - case SIOCGIFADDR: - case SIOCSIFADDR: - return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, - cmd); - default: - if (!sk->sk_prot->ioctl) - return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); - } -} - -static const struct proto_ops ieee802154_raw_ops = { - .family = PF_IEEE802154, - .owner = THIS_MODULE, - .release = ieee802154_sock_release, - .bind = ieee802154_sock_bind, - .connect = ieee802154_sock_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = datagram_poll, - .ioctl = ieee802154_sock_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = ieee802154_sock_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -static const struct proto_ops ieee802154_dgram_ops = { - .family = PF_IEEE802154, - .owner = THIS_MODULE, - .release = ieee802154_sock_release, - .bind = ieee802154_sock_bind, - .connect = ieee802154_sock_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = sock_no_getname, - .poll = datagram_poll, - .ioctl = ieee802154_sock_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, - .setsockopt = sock_common_setsockopt, - .getsockopt = sock_common_getsockopt, - .sendmsg = ieee802154_sock_sendmsg, - .recvmsg = sock_common_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -/* Create a socket. Initialise the socket, blank the addresses - * set the state. - */ -static int ieee802154_create(struct net *net, struct socket *sock, - int protocol, int kern) -{ - struct sock *sk; - int rc; - struct proto *proto; - const struct proto_ops *ops; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - switch (sock->type) { - case SOCK_RAW: - proto = &ieee802154_raw_prot; - ops = &ieee802154_raw_ops; - break; - case SOCK_DGRAM: - proto = &ieee802154_dgram_prot; - ops = &ieee802154_dgram_ops; - break; - default: - rc = -ESOCKTNOSUPPORT; - goto out; - } - - rc = -ENOMEM; - sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); - if (!sk) - goto out; - rc = 0; - - sock->ops = ops; - - sock_init_data(sock, sk); - /* FIXME: sk->sk_destruct */ - sk->sk_family = PF_IEEE802154; - - /* Checksums on by default */ - sock_set_flag(sk, SOCK_ZAPPED); - - if (sk->sk_prot->hash) - sk->sk_prot->hash(sk); - - if (sk->sk_prot->init) { - rc = sk->sk_prot->init(sk); - if (rc) - sk_common_release(sk); - } -out: - return rc; -} - -static const struct net_proto_family ieee802154_family_ops = { - .family = PF_IEEE802154, - .create = ieee802154_create, - .owner = THIS_MODULE, -}; - -static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - if (!netif_running(dev)) - goto drop; - pr_debug("got frame, type %d, dev %p\n", dev->type, dev); -#ifdef DEBUG - print_hex_dump_bytes("ieee802154_rcv ", - DUMP_PREFIX_NONE, skb->data, skb->len); -#endif - - if (!net_eq(dev_net(dev), &init_net)) - goto drop; - - ieee802154_raw_deliver(dev, skb); - - if (dev->type != ARPHRD_IEEE802154) - goto drop; - - if (skb->pkt_type != PACKET_OTHERHOST) - return ieee802154_dgram_deliver(dev, skb); - -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -static struct packet_type ieee802154_packet_type = { - .type = htons(ETH_P_IEEE802154), - .func = ieee802154_rcv, -}; - -static int __init af_ieee802154_init(void) -{ - int rc = -EINVAL; - - rc = proto_register(&ieee802154_raw_prot, 1); - if (rc) - goto out; - - rc = proto_register(&ieee802154_dgram_prot, 1); - if (rc) - goto err_dgram; - - /* Tell SOCKET that we are alive */ - rc = sock_register(&ieee802154_family_ops); - if (rc) - goto err_sock; - dev_add_pack(&ieee802154_packet_type); - - rc = 0; - goto out; - -err_sock: - proto_unregister(&ieee802154_dgram_prot); -err_dgram: - proto_unregister(&ieee802154_raw_prot); -out: - return rc; -} - -static void __exit af_ieee802154_remove(void) -{ - dev_remove_pack(&ieee802154_packet_type); - sock_unregister(PF_IEEE802154); - proto_unregister(&ieee802154_dgram_prot); - proto_unregister(&ieee802154_raw_prot); -} - -module_init(af_ieee802154_init); -module_exit(af_ieee802154_remove); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_IEEE802154); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c deleted file mode 100644 index d1930b70c4aa..000000000000 --- a/net/ieee802154/dgram.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * IEEE 802.15.4 dgram socket interface - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#include <linux/capability.h> -#include <linux/net.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/ieee802154.h> -#include <net/sock.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include <asm/ioctls.h> - -#include "af802154.h" - -static HLIST_HEAD(dgram_head); -static DEFINE_RWLOCK(dgram_lock); - -struct dgram_sock { - struct sock sk; - - struct ieee802154_addr src_addr; - struct ieee802154_addr dst_addr; - - unsigned int bound:1; - unsigned int connected:1; - unsigned int want_ack:1; - unsigned int secen:1; - unsigned int secen_override:1; - unsigned int seclevel:3; - unsigned int seclevel_override:1; -}; - -static inline struct dgram_sock *dgram_sk(const struct sock *sk) -{ - return container_of(sk, struct dgram_sock, sk); -} - -static void dgram_hash(struct sock *sk) -{ - write_lock_bh(&dgram_lock); - sk_add_node(sk, &dgram_head); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock_bh(&dgram_lock); -} - -static void dgram_unhash(struct sock *sk) -{ - write_lock_bh(&dgram_lock); - if (sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&dgram_lock); -} - -static int dgram_init(struct sock *sk) -{ - struct dgram_sock *ro = dgram_sk(sk); - - ro->want_ack = 1; - return 0; -} - -static void dgram_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - -static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) -{ - struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; - struct ieee802154_addr haddr; - struct dgram_sock *ro = dgram_sk(sk); - int err = -EINVAL; - struct net_device *dev; - - lock_sock(sk); - - ro->bound = 0; - - if (len < sizeof(*addr)) - goto out; - - if (addr->family != AF_IEEE802154) - goto out; - - ieee802154_addr_from_sa(&haddr, &addr->addr); - dev = ieee802154_get_dev(sock_net(sk), &haddr); - if (!dev) { - err = -ENODEV; - goto out; - } - - if (dev->type != ARPHRD_IEEE802154) { - err = -ENODEV; - goto out_put; - } - - ro->src_addr = haddr; - - ro->bound = 1; - err = 0; -out_put: - dev_put(dev); -out: - release_sock(sk); - - return err; -} - -static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) -{ - switch (cmd) { - case SIOCOUTQ: - { - int amount = sk_wmem_alloc_get(sk); - - return put_user(amount, (int __user *)arg); - } - - case SIOCINQ: - { - struct sk_buff *skb; - unsigned long amount; - - amount = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) { - /* We will only return the amount - * of this packet since that is all - * that will be read. - */ - amount = skb->len - ieee802154_hdr_length(skb); - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); - } - } - - return -ENOIOCTLCMD; -} - -/* FIXME: autobind */ -static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, - int len) -{ - struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; - struct dgram_sock *ro = dgram_sk(sk); - int err = 0; - - if (len < sizeof(*addr)) - return -EINVAL; - - if (addr->family != AF_IEEE802154) - return -EINVAL; - - lock_sock(sk); - - if (!ro->bound) { - err = -ENETUNREACH; - goto out; - } - - ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); - ro->connected = 1; - -out: - release_sock(sk); - return err; -} - -static int dgram_disconnect(struct sock *sk, int flags) -{ - struct dgram_sock *ro = dgram_sk(sk); - - lock_sock(sk); - ro->connected = 0; - release_sock(sk); - - return 0; -} - -static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) -{ - struct net_device *dev; - unsigned int mtu; - struct sk_buff *skb; - struct ieee802154_mac_cb *cb; - struct dgram_sock *ro = dgram_sk(sk); - struct ieee802154_addr dst_addr; - int hlen, tlen; - int err; - - if (msg->msg_flags & MSG_OOB) { - pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); - return -EOPNOTSUPP; - } - - if (!ro->connected && !msg->msg_name) - return -EDESTADDRREQ; - else if (ro->connected && msg->msg_name) - return -EISCONN; - - if (!ro->bound) - dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); - else - dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); - - if (!dev) { - pr_debug("no dev\n"); - err = -ENXIO; - goto out; - } - mtu = dev->mtu; - pr_debug("name = %s, mtu = %u\n", dev->name, mtu); - - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EMSGSIZE; - goto out_dev; - } - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, - &err); - if (!skb) - goto out_dev; - - skb_reserve(skb, hlen); - - skb_reset_network_header(skb); - - cb = mac_cb_init(skb); - cb->type = IEEE802154_FC_TYPE_DATA; - cb->ackreq = ro->want_ack; - - if (msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_ieee802154*, - daddr, msg->msg_name); - - ieee802154_addr_from_sa(&dst_addr, &daddr->addr); - } else { - dst_addr = ro->dst_addr; - } - - cb->secen = ro->secen; - cb->secen_override = ro->secen_override; - cb->seclevel = ro->seclevel; - cb->seclevel_override = ro->seclevel_override; - - err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, - ro->bound ? &ro->src_addr : NULL, size); - if (err < 0) - goto out_skb; - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err < 0) - goto out_skb; - - skb->dev = dev; - skb->sk = sk; - skb->protocol = htons(ETH_P_IEEE802154); - - dev_put(dev); - - err = dev_queue_xmit(skb); - if (err > 0) - err = net_xmit_errno(err); - - return err ?: size; - -out_skb: - kfree_skb(skb); -out_dev: - dev_put(dev); -out: - return err; -} - -static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) -{ - size_t copied = 0; - int err = -EOPNOTSUPP; - struct sk_buff *skb; - DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); - - skb = skb_recv_datagram(sk, flags, noblock, &err); - if (!skb) - goto out; - - copied = skb->len; - if (len < copied) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - - /* FIXME: skip headers if necessary ?! */ - err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto done; - - sock_recv_ts_and_drops(msg, sk, skb); - - if (saddr) { - saddr->family = AF_IEEE802154; - ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); - *addr_len = sizeof(*saddr); - } - - if (flags & MSG_TRUNC) - copied = skb->len; -done: - skb_free_datagram(sk, skb); -out: - if (err) - return err; - return copied; -} - -static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); - return NET_RX_DROP; - } - - return NET_RX_SUCCESS; -} - -static inline bool -ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, - struct dgram_sock *ro) -{ - if (!ro->bound) - return true; - - if (ro->src_addr.mode == IEEE802154_ADDR_LONG && - hw_addr == ro->src_addr.extended_addr) - return true; - - if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && - pan_id == ro->src_addr.pan_id && - short_addr == ro->src_addr.short_addr) - return true; - - return false; -} - -int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) -{ - struct sock *sk, *prev = NULL; - int ret = NET_RX_SUCCESS; - __le16 pan_id, short_addr; - __le64 hw_addr; - - /* Data frame processing */ - BUG_ON(dev->type != ARPHRD_IEEE802154); - - pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); - hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); - - read_lock(&dgram_lock); - sk_for_each(sk, &dgram_head) { - if (ieee802154_match_sock(hw_addr, pan_id, short_addr, - dgram_sk(sk))) { - if (prev) { - struct sk_buff *clone; - - clone = skb_clone(skb, GFP_ATOMIC); - if (clone) - dgram_rcv_skb(prev, clone); - } - - prev = sk; - } - } - - if (prev) { - dgram_rcv_skb(prev, skb); - } else { - kfree_skb(skb); - ret = NET_RX_DROP; - } - read_unlock(&dgram_lock); - - return ret; -} - -static int dgram_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct dgram_sock *ro = dgram_sk(sk); - - int val, len; - - if (level != SOL_IEEE802154) - return -EOPNOTSUPP; - - if (get_user(len, optlen)) - return -EFAULT; - - len = min_t(unsigned int, len, sizeof(int)); - - switch (optname) { - case WPAN_WANTACK: - val = ro->want_ack; - break; - case WPAN_SECURITY: - if (!ro->secen_override) - val = WPAN_SECURITY_DEFAULT; - else if (ro->secen) - val = WPAN_SECURITY_ON; - else - val = WPAN_SECURITY_OFF; - break; - case WPAN_SECURITY_LEVEL: - if (!ro->seclevel_override) - val = WPAN_SECURITY_LEVEL_DEFAULT; - else - val = ro->seclevel; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &val, len)) - return -EFAULT; - return 0; -} - -static int dgram_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct dgram_sock *ro = dgram_sk(sk); - struct net *net = sock_net(sk); - int val; - int err = 0; - - if (optlen < sizeof(int)) - return -EINVAL; - - if (get_user(val, (int __user *)optval)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case WPAN_WANTACK: - ro->want_ack = !!val; - break; - case WPAN_SECURITY: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && - !ns_capable(net->user_ns, CAP_NET_RAW)) { - err = -EPERM; - break; - } - - switch (val) { - case WPAN_SECURITY_DEFAULT: - ro->secen_override = 0; - break; - case WPAN_SECURITY_ON: - ro->secen_override = 1; - ro->secen = 1; - break; - case WPAN_SECURITY_OFF: - ro->secen_override = 1; - ro->secen = 0; - break; - default: - err = -EINVAL; - break; - } - break; - case WPAN_SECURITY_LEVEL: - if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && - !ns_capable(net->user_ns, CAP_NET_RAW)) { - err = -EPERM; - break; - } - - if (val < WPAN_SECURITY_LEVEL_DEFAULT || - val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { - err = -EINVAL; - } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { - ro->seclevel_override = 0; - } else { - ro->seclevel_override = 1; - ro->seclevel = val; - } - break; - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -struct proto ieee802154_dgram_prot = { - .name = "IEEE-802.15.4-MAC", - .owner = THIS_MODULE, - .obj_size = sizeof(struct dgram_sock), - .init = dgram_init, - .close = dgram_close, - .bind = dgram_bind, - .sendmsg = dgram_sendmsg, - .recvmsg = dgram_recvmsg, - .hash = dgram_hash, - .unhash = dgram_unhash, - .connect = dgram_connect, - .disconnect = dgram_disconnect, - .ioctl = dgram_ioctl, - .getsockopt = dgram_getsockopt, - .setsockopt = dgram_setsockopt, -}; - diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index fa1464762d0d..c8133c07ceee 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -63,13 +63,9 @@ int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) struct nlmsghdr *nlh = nlmsg_hdr(msg); void *hdr = genlmsg_data(nlmsg_data(nlh)); - if (genlmsg_end(msg, hdr) < 0) - goto out; + genlmsg_end(msg, hdr); return genlmsg_multicast(&nl802154_family, msg, 0, group, GFP_ATOMIC); -out: - nlmsg_free(msg); - return -ENOBUFS; } struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, @@ -96,13 +92,9 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) struct nlmsghdr *nlh = nlmsg_hdr(msg); void *hdr = genlmsg_data(nlmsg_data(nlh)); - if (genlmsg_end(msg, hdr) < 0) - goto out; + genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); -out: - nlmsg_free(msg); - return -ENOBUFS; } static const struct genl_ops ieee8021154_ops[] = { diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index cd919493c976..9105265920fe 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -121,7 +121,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, params.transmit_power) || nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) || nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE, - params.cca_mode) || + params.cca.mode) || nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL, params.cca_ed_level) || nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES, @@ -136,7 +136,8 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, } wpan_phy_put(phy); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: wpan_phy_put(phy); @@ -516,7 +517,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info) params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]); if (info->attrs[IEEE802154_ATTR_CCA_MODE]) - params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); + params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]); if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]) params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]); diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 7baf98b14611..1b9d25f6e898 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -65,7 +65,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, goto nla_put_failure; mutex_unlock(&phy->pib_lock); kfree(buf); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: mutex_unlock(&phy->pib_lock); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 889647744697..a4daf91b8d0a 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -209,7 +209,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_TX_POWER] = { .type = NLA_S8, }, - [NL802154_ATTR_CCA_MODE] = { .type = NLA_U8, }, + [NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, }, + [NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, }, [NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, }, @@ -290,16 +291,23 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, goto nla_put_failure; /* cca mode */ - if (nla_put_u8(msg, NL802154_ATTR_CCA_MODE, - rdev->wpan_phy.cca_mode)) + if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE, + rdev->wpan_phy.cca.mode)) goto nla_put_failure; + if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) { + if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT, + rdev->wpan_phy.cca.opt)) + goto nla_put_failure; + } + if (nla_put_s8(msg, NL802154_ATTR_TX_POWER, rdev->wpan_phy.transmit_power)) goto nla_put_failure; finish: - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -482,7 +490,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -622,6 +631,31 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info) return rdev_set_channel(rdev, page, channel); } +static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct wpan_phy_cca cca; + + if (!info->attrs[NL802154_ATTR_CCA_MODE]) + return -EINVAL; + + cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]); + /* checking 802.15.4 constraints */ + if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX) + return -EINVAL; + + if (cca.mode == NL802154_CCA_ENERGY_CARRIER) { + if (!info->attrs[NL802154_ATTR_CCA_OPT]) + return -EINVAL; + + cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]); + if (cca.opt > NL802154_CCA_OPT_ATTR_MAX) + return -EINVAL; + } + + return rdev_set_cca_mode(rdev, &cca); +} + static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) { struct cfg802154_registered_device *rdev = info->user_ptr[0]; @@ -895,6 +929,14 @@ static const struct genl_ops nl802154_ops[] = { NL802154_FLAG_NEED_RTNL, }, { + .cmd = NL802154_CMD_SET_CCA_MODE, + .doit = nl802154_set_cca_mode, + .policy = nl802154_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | + NL802154_FLAG_NEED_RTNL, + }, + { .cmd = NL802154_CMD_SET_PAN_ID, .doit = nl802154_set_pan_id, .policy = nl802154_policy, diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c deleted file mode 100644 index 1674b115c891..000000000000 --- a/net/ieee802154/raw.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Raw IEEE 802.15.4 sockets - * - * Copyright 2007, 2008 Siemens AG - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Written by: - * Sergey Lapin <slapin@ossfans.org> - * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> - */ - -#include <linux/net.h> -#include <linux/module.h> -#include <linux/if_arp.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <net/sock.h> -#include <net/af_ieee802154.h> -#include <net/ieee802154_netdev.h> - -#include "af802154.h" - -static HLIST_HEAD(raw_head); -static DEFINE_RWLOCK(raw_lock); - -static void raw_hash(struct sock *sk) -{ - write_lock_bh(&raw_lock); - sk_add_node(sk, &raw_head); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock_bh(&raw_lock); -} - -static void raw_unhash(struct sock *sk) -{ - write_lock_bh(&raw_lock); - if (sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&raw_lock); -} - -static void raw_close(struct sock *sk, long timeout) -{ - sk_common_release(sk); -} - -static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) -{ - struct ieee802154_addr addr; - struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; - int err = 0; - struct net_device *dev = NULL; - - if (len < sizeof(*uaddr)) - return -EINVAL; - - uaddr = (struct sockaddr_ieee802154 *)_uaddr; - if (uaddr->family != AF_IEEE802154) - return -EINVAL; - - lock_sock(sk); - - ieee802154_addr_from_sa(&addr, &uaddr->addr); - dev = ieee802154_get_dev(sock_net(sk), &addr); - if (!dev) { - err = -ENODEV; - goto out; - } - - if (dev->type != ARPHRD_IEEE802154) { - err = -ENODEV; - goto out_put; - } - - sk->sk_bound_dev_if = dev->ifindex; - sk_dst_reset(sk); - -out_put: - dev_put(dev); -out: - release_sock(sk); - - return err; -} - -static int raw_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) -{ - return -ENOTSUPP; -} - -static int raw_disconnect(struct sock *sk, int flags) -{ - return 0; -} - -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) -{ - struct net_device *dev; - unsigned int mtu; - struct sk_buff *skb; - int hlen, tlen; - int err; - - if (msg->msg_flags & MSG_OOB) { - pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); - return -EOPNOTSUPP; - } - - lock_sock(sk); - if (!sk->sk_bound_dev_if) - dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); - else - dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); - release_sock(sk); - - if (!dev) { - pr_debug("no dev\n"); - err = -ENXIO; - goto out; - } - - mtu = dev->mtu; - pr_debug("name = %s, mtu = %u\n", dev->name, mtu); - - if (size > mtu) { - pr_debug("size = %Zu, mtu = %u\n", size, mtu); - err = -EINVAL; - goto out_dev; - } - - hlen = LL_RESERVED_SPACE(dev); - tlen = dev->needed_tailroom; - skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - goto out_dev; - - skb_reserve(skb, hlen); - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err < 0) - goto out_skb; - - skb->dev = dev; - skb->sk = sk; - skb->protocol = htons(ETH_P_IEEE802154); - - dev_put(dev); - - err = dev_queue_xmit(skb); - if (err > 0) - err = net_xmit_errno(err); - - return err ?: size; - -out_skb: - kfree_skb(skb); -out_dev: - dev_put(dev); -out: - return err; -} - -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) -{ - size_t copied = 0; - int err = -EOPNOTSUPP; - struct sk_buff *skb; - - skb = skb_recv_datagram(sk, flags, noblock, &err); - if (!skb) - goto out; - - copied = skb->len; - if (len < copied) { - msg->msg_flags |= MSG_TRUNC; - copied = len; - } - - err = skb_copy_datagram_msg(skb, 0, msg, copied); - if (err) - goto done; - - sock_recv_ts_and_drops(msg, sk, skb); - - if (flags & MSG_TRUNC) - copied = skb->len; -done: - skb_free_datagram(sk, skb); -out: - if (err) - return err; - return copied; -} - -static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NET_RX_DROP; - - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); - return NET_RX_DROP; - } - - return NET_RX_SUCCESS; -} - -void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) -{ - struct sock *sk; - - read_lock(&raw_lock); - sk_for_each(sk, &raw_head) { - bh_lock_sock(sk); - if (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == dev->ifindex) { - struct sk_buff *clone; - - clone = skb_clone(skb, GFP_ATOMIC); - if (clone) - raw_rcv_skb(sk, clone); - } - bh_unlock_sock(sk); - } - read_unlock(&raw_lock); -} - -static int raw_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - return -EOPNOTSUPP; -} - -static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - return -EOPNOTSUPP; -} - -struct proto ieee802154_raw_prot = { - .name = "IEEE-802.15.4-RAW", - .owner = THIS_MODULE, - .obj_size = sizeof(struct sock), - .close = raw_close, - .bind = raw_bind, - .sendmsg = raw_sendmsg, - .recvmsg = raw_recvmsg, - .hash = raw_hash, - .unhash = raw_unhash, - .connect = raw_connect, - .disconnect = raw_disconnect, - .getsockopt = raw_getsockopt, - .setsockopt = raw_setsockopt, -}; diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index aff54fbd9264..7c46732fad2b 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -42,6 +42,13 @@ rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel) } static inline int +rdev_set_cca_mode(struct cfg802154_registered_device *rdev, + const struct wpan_phy_cca *cca) +{ + return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca); +} + +static inline int rdev_set_pan_id(struct cfg802154_registered_device *rdev, struct wpan_dev *wpan_dev, __le16 pan_id) { diff --git a/net/ieee802154/reassembly.h b/net/ieee802154/reassembly.h deleted file mode 100644 index 836b16fa001f..000000000000 --- a/net/ieee802154/reassembly.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__ -#define __IEEE802154_6LOWPAN_REASSEMBLY_H__ - -#include <net/inet_frag.h> - -struct lowpan_create_arg { - u16 tag; - u16 d_size; - const struct ieee802154_addr *src; - const struct ieee802154_addr *dst; -}; - -/* Equivalent of ipv4 struct ip - */ -struct lowpan_frag_queue { - struct inet_frag_queue q; - - u16 tag; - u16 d_size; - struct ieee802154_addr saddr; - struct ieee802154_addr daddr; -}; - -static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) -{ - switch (a->mode) { - case IEEE802154_ADDR_LONG: - return (((__force u64)a->extended_addr) >> 32) ^ - (((__force u64)a->extended_addr) & 0xffffffff); - case IEEE802154_ADDR_SHORT: - return (__force u32)(a->short_addr); - default: - return 0; - } -} - -int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type); -void lowpan_net_frag_exit(void); -int lowpan_net_frag_init(void); - -#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */ diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c new file mode 100644 index 000000000000..2878d8ca6d3b --- /dev/null +++ b/net/ieee802154/socket.c @@ -0,0 +1,1125 @@ +/* + * IEEE802154.4 socket interface + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Written by: + * Sergey Lapin <slapin@ossfans.org> + * Maxim Gorbachyov <maxim.gorbachev@siemens.com> + */ + +#include <linux/net.h> +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/if_arp.h> +#include <linux/if.h> +#include <linux/termios.h> /* For TIOCOUTQ/INQ */ +#include <linux/list.h> +#include <linux/slab.h> +#include <net/datalink.h> +#include <net/psnap.h> +#include <net/sock.h> +#include <net/tcp_states.h> +#include <net/route.h> + +#include <net/af_ieee802154.h> +#include <net/ieee802154_netdev.h> + +/* Utility function for families */ +static struct net_device* +ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) +{ + struct net_device *dev = NULL; + struct net_device *tmp; + __le16 pan_id, short_addr; + u8 hwaddr[IEEE802154_ADDR_LEN]; + + switch (addr->mode) { + case IEEE802154_ADDR_LONG: + ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); + rcu_read_lock(); + dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + break; + case IEEE802154_ADDR_SHORT: + if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || + addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) + break; + + rtnl_lock(); + + for_each_netdev(net, tmp) { + if (tmp->type != ARPHRD_IEEE802154) + continue; + + pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp); + short_addr = + ieee802154_mlme_ops(tmp)->get_short_addr(tmp); + + if (pan_id == addr->pan_id && + short_addr == addr->short_addr) { + dev = tmp; + dev_hold(dev); + break; + } + } + + rtnl_unlock(); + break; + default: + pr_warn("Unsupported ieee802154 address type: %d\n", + addr->mode); + break; + } + + return dev; +} + +static int ieee802154_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} + +static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + + return sk->sk_prot->sendmsg(iocb, sk, msg, len); +} + +static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) +{ + struct sock *sk = sock->sk; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, uaddr, addr_len); + + return sock_no_bind(sock, uaddr, addr_len); +} + +static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = sock->sk; + + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return sk->sk_prot->disconnect(sk, flags); + + return sk->sk_prot->connect(sk, uaddr, addr_len); +} + +static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, + unsigned int cmd) +{ + struct ifreq ifr; + int ret = -ENOIOCTLCMD; + struct net_device *dev; + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + + dev_load(sock_net(sk), ifr.ifr_name); + dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); + + if (!dev) + return -ENODEV; + + if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) + ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); + + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + ret = -EFAULT; + dev_put(dev); + + return ret; +} + +static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCGSTAMP: + return sock_get_timestamp(sk, (struct timeval __user *)arg); + case SIOCGSTAMPNS: + return sock_get_timestampns(sk, (struct timespec __user *)arg); + case SIOCGIFADDR: + case SIOCSIFADDR: + return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg, + cmd); + default: + if (!sk->sk_prot->ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->ioctl(sk, cmd, arg); + } +} + +/* RAW Sockets (802.15.4 created in userspace) */ +static HLIST_HEAD(raw_head); +static DEFINE_RWLOCK(raw_lock); + +static void raw_hash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + sk_add_node(sk, &raw_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&raw_lock); +} + +static void raw_unhash(struct sock *sk) +{ + write_lock_bh(&raw_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&raw_lock); +} + +static void raw_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) +{ + struct ieee802154_addr addr; + struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr; + int err = 0; + struct net_device *dev = NULL; + + if (len < sizeof(*uaddr)) + return -EINVAL; + + uaddr = (struct sockaddr_ieee802154 *)_uaddr; + if (uaddr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + ieee802154_addr_from_sa(&addr, &uaddr->addr); + dev = ieee802154_get_dev(sock_net(sk), &addr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + sk->sk_bound_dev_if = dev->ifindex; + sk_dst_reset(sk); + +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int raw_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + return -ENOTSUPP; +} + +static int raw_disconnect(struct sock *sk, int flags) +{ + return 0; +} + +static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + struct net_device *dev; + unsigned int mtu; + struct sk_buff *skb; + int hlen, tlen; + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + lock_sock(sk); + if (!sk->sk_bound_dev_if) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + release_sock(sk); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EINVAL; + goto out_dev; + } + + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, hlen); + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + + err = memcpy_from_msg(skb_put(skb, size), msg, size); + if (err < 0) + goto out_skb; + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + err = skb_copy_datagram_msg(skb, 0, msg, copied); + if (err) + goto done; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + + if (sock_queue_rcv_skb(sk, skb) < 0) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +static void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk; + + read_lock(&raw_lock); + sk_for_each(sk, &raw_head) { + bh_lock_sock(sk); + if (!sk->sk_bound_dev_if || + sk->sk_bound_dev_if == dev->ifindex) { + struct sk_buff *clone; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + raw_rcv_skb(sk, clone); + } + bh_unlock_sock(sk); + } + read_unlock(&raw_lock); +} + +static int raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +static int raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + return -EOPNOTSUPP; +} + +static struct proto ieee802154_raw_prot = { + .name = "IEEE-802.15.4-RAW", + .owner = THIS_MODULE, + .obj_size = sizeof(struct sock), + .close = raw_close, + .bind = raw_bind, + .sendmsg = raw_sendmsg, + .recvmsg = raw_recvmsg, + .hash = raw_hash, + .unhash = raw_unhash, + .connect = raw_connect, + .disconnect = raw_disconnect, + .getsockopt = raw_getsockopt, + .setsockopt = raw_setsockopt, +}; + +static const struct proto_ops ieee802154_raw_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +/* DGRAM Sockets (802.15.4 dataframes) */ +static HLIST_HEAD(dgram_head); +static DEFINE_RWLOCK(dgram_lock); + +struct dgram_sock { + struct sock sk; + + struct ieee802154_addr src_addr; + struct ieee802154_addr dst_addr; + + unsigned int bound:1; + unsigned int connected:1; + unsigned int want_ack:1; + unsigned int secen:1; + unsigned int secen_override:1; + unsigned int seclevel:3; + unsigned int seclevel_override:1; +}; + +static inline struct dgram_sock *dgram_sk(const struct sock *sk) +{ + return container_of(sk, struct dgram_sock, sk); +} + +static void dgram_hash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + sk_add_node(sk, &dgram_head); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + write_unlock_bh(&dgram_lock); +} + +static void dgram_unhash(struct sock *sk) +{ + write_lock_bh(&dgram_lock); + if (sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(&dgram_lock); +} + +static int dgram_init(struct sock *sk) +{ + struct dgram_sock *ro = dgram_sk(sk); + + ro->want_ack = 1; + return 0; +} + +static void dgram_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct ieee802154_addr haddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = -EINVAL; + struct net_device *dev; + + lock_sock(sk); + + ro->bound = 0; + + if (len < sizeof(*addr)) + goto out; + + if (addr->family != AF_IEEE802154) + goto out; + + ieee802154_addr_from_sa(&haddr, &addr->addr); + dev = ieee802154_get_dev(sock_net(sk), &haddr); + if (!dev) { + err = -ENODEV; + goto out; + } + + if (dev->type != ARPHRD_IEEE802154) { + err = -ENODEV; + goto out_put; + } + + ro->src_addr = haddr; + + ro->bound = 1; + err = 0; +out_put: + dev_put(dev); +out: + release_sock(sk); + + return err; +} + +static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + { + int amount = sk_wmem_alloc_get(sk); + + return put_user(amount, (int __user *)arg); + } + + case SIOCINQ: + { + struct sk_buff *skb; + unsigned long amount; + + amount = 0; + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + /* We will only return the amount + * of this packet since that is all + * that will be read. + */ + amount = skb->len - ieee802154_hdr_length(skb); + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } + } + + return -ENOIOCTLCMD; +} + +/* FIXME: autobind */ +static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, + int len) +{ + struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; + struct dgram_sock *ro = dgram_sk(sk); + int err = 0; + + if (len < sizeof(*addr)) + return -EINVAL; + + if (addr->family != AF_IEEE802154) + return -EINVAL; + + lock_sock(sk); + + if (!ro->bound) { + err = -ENETUNREACH; + goto out; + } + + ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr); + ro->connected = 1; + +out: + release_sock(sk); + return err; +} + +static int dgram_disconnect(struct sock *sk, int flags) +{ + struct dgram_sock *ro = dgram_sk(sk); + + lock_sock(sk); + ro->connected = 0; + release_sock(sk); + + return 0; +} + +static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + struct net_device *dev; + unsigned int mtu; + struct sk_buff *skb; + struct ieee802154_mac_cb *cb; + struct dgram_sock *ro = dgram_sk(sk); + struct ieee802154_addr dst_addr; + int hlen, tlen; + int err; + + if (msg->msg_flags & MSG_OOB) { + pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags); + return -EOPNOTSUPP; + } + + if (!ro->connected && !msg->msg_name) + return -EDESTADDRREQ; + else if (ro->connected && msg->msg_name) + return -EISCONN; + + if (!ro->bound) + dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); + else + dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr); + + if (!dev) { + pr_debug("no dev\n"); + err = -ENXIO; + goto out; + } + mtu = dev->mtu; + pr_debug("name = %s, mtu = %u\n", dev->name, mtu); + + if (size > mtu) { + pr_debug("size = %Zu, mtu = %u\n", size, mtu); + err = -EMSGSIZE; + goto out_dev; + } + + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, + msg->msg_flags & MSG_DONTWAIT, + &err); + if (!skb) + goto out_dev; + + skb_reserve(skb, hlen); + + skb_reset_network_header(skb); + + cb = mac_cb_init(skb); + cb->type = IEEE802154_FC_TYPE_DATA; + cb->ackreq = ro->want_ack; + + if (msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_ieee802154*, + daddr, msg->msg_name); + + ieee802154_addr_from_sa(&dst_addr, &daddr->addr); + } else { + dst_addr = ro->dst_addr; + } + + cb->secen = ro->secen; + cb->secen_override = ro->secen_override; + cb->seclevel = ro->seclevel; + cb->seclevel_override = ro->seclevel_override; + + err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr, + ro->bound ? &ro->src_addr : NULL, size); + if (err < 0) + goto out_skb; + + err = memcpy_from_msg(skb_put(skb, size), msg, size); + if (err < 0) + goto out_skb; + + skb->dev = dev; + skb->sk = sk; + skb->protocol = htons(ETH_P_IEEE802154); + + dev_put(dev); + + err = dev_queue_xmit(skb); + if (err > 0) + err = net_xmit_errno(err); + + return err ?: size; + +out_skb: + kfree_skb(skb); +out_dev: + dev_put(dev); +out: + return err; +} + +static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + size_t copied = 0; + int err = -EOPNOTSUPP; + struct sk_buff *skb; + DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + /* FIXME: skip headers if necessary ?! */ + err = skb_copy_datagram_msg(skb, 0, msg, copied); + if (err) + goto done; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (saddr) { + saddr->family = AF_IEEE802154; + ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); + *addr_len = sizeof(*saddr); + } + + if (flags & MSG_TRUNC) + copied = skb->len; +done: + skb_free_datagram(sk, skb); +out: + if (err) + return err; + return copied; +} + +static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + + if (sock_queue_rcv_skb(sk, skb) < 0) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +static inline bool +ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr, + struct dgram_sock *ro) +{ + if (!ro->bound) + return true; + + if (ro->src_addr.mode == IEEE802154_ADDR_LONG && + hw_addr == ro->src_addr.extended_addr) + return true; + + if (ro->src_addr.mode == IEEE802154_ADDR_SHORT && + pan_id == ro->src_addr.pan_id && + short_addr == ro->src_addr.short_addr) + return true; + + return false; +} + +static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) +{ + struct sock *sk, *prev = NULL; + int ret = NET_RX_SUCCESS; + __le16 pan_id, short_addr; + __le64 hw_addr; + + /* Data frame processing */ + BUG_ON(dev->type != ARPHRD_IEEE802154); + + pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); + hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr); + + read_lock(&dgram_lock); + sk_for_each(sk, &dgram_head) { + if (ieee802154_match_sock(hw_addr, pan_id, short_addr, + dgram_sk(sk))) { + if (prev) { + struct sk_buff *clone; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) + dgram_rcv_skb(prev, clone); + } + + prev = sk; + } + } + + if (prev) { + dgram_rcv_skb(prev, skb); + } else { + kfree_skb(skb); + ret = NET_RX_DROP; + } + read_unlock(&dgram_lock); + + return ret; +} + +static int dgram_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct dgram_sock *ro = dgram_sk(sk); + + int val, len; + + if (level != SOL_IEEE802154) + return -EOPNOTSUPP; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(int)); + + switch (optname) { + case WPAN_WANTACK: + val = ro->want_ack; + break; + case WPAN_SECURITY: + if (!ro->secen_override) + val = WPAN_SECURITY_DEFAULT; + else if (ro->secen) + val = WPAN_SECURITY_ON; + else + val = WPAN_SECURITY_OFF; + break; + case WPAN_SECURITY_LEVEL: + if (!ro->seclevel_override) + val = WPAN_SECURITY_LEVEL_DEFAULT; + else + val = ro->seclevel; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; +} + +static int dgram_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct dgram_sock *ro = dgram_sk(sk); + struct net *net = sock_net(sk); + int val; + int err = 0; + + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *)optval)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case WPAN_WANTACK: + ro->want_ack = !!val; + break; + case WPAN_SECURITY: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + switch (val) { + case WPAN_SECURITY_DEFAULT: + ro->secen_override = 0; + break; + case WPAN_SECURITY_ON: + ro->secen_override = 1; + ro->secen = 1; + break; + case WPAN_SECURITY_OFF: + ro->secen_override = 1; + ro->secen = 0; + break; + default: + err = -EINVAL; + break; + } + break; + case WPAN_SECURITY_LEVEL: + if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { + err = -EPERM; + break; + } + + if (val < WPAN_SECURITY_LEVEL_DEFAULT || + val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) { + err = -EINVAL; + } else if (val == WPAN_SECURITY_LEVEL_DEFAULT) { + ro->seclevel_override = 0; + } else { + ro->seclevel_override = 1; + ro->seclevel = val; + } + break; + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static struct proto ieee802154_dgram_prot = { + .name = "IEEE-802.15.4-MAC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct dgram_sock), + .init = dgram_init, + .close = dgram_close, + .bind = dgram_bind, + .sendmsg = dgram_sendmsg, + .recvmsg = dgram_recvmsg, + .hash = dgram_hash, + .unhash = dgram_unhash, + .connect = dgram_connect, + .disconnect = dgram_disconnect, + .ioctl = dgram_ioctl, + .getsockopt = dgram_getsockopt, + .setsockopt = dgram_setsockopt, +}; + +static const struct proto_ops ieee802154_dgram_ops = { + .family = PF_IEEE802154, + .owner = THIS_MODULE, + .release = ieee802154_sock_release, + .bind = ieee802154_sock_bind, + .connect = ieee802154_sock_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = datagram_poll, + .ioctl = ieee802154_sock_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = ieee802154_sock_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +/* Create a socket. Initialise the socket, blank the addresses + * set the state. + */ +static int ieee802154_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + struct sock *sk; + int rc; + struct proto *proto; + const struct proto_ops *ops; + + if (!net_eq(net, &init_net)) + return -EAFNOSUPPORT; + + switch (sock->type) { + case SOCK_RAW: + proto = &ieee802154_raw_prot; + ops = &ieee802154_raw_ops; + break; + case SOCK_DGRAM: + proto = &ieee802154_dgram_prot; + ops = &ieee802154_dgram_ops; + break; + default: + rc = -ESOCKTNOSUPPORT; + goto out; + } + + rc = -ENOMEM; + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + if (!sk) + goto out; + rc = 0; + + sock->ops = ops; + + sock_init_data(sock, sk); + /* FIXME: sk->sk_destruct */ + sk->sk_family = PF_IEEE802154; + + /* Checksums on by default */ + sock_set_flag(sk, SOCK_ZAPPED); + + if (sk->sk_prot->hash) + sk->sk_prot->hash(sk); + + if (sk->sk_prot->init) { + rc = sk->sk_prot->init(sk); + if (rc) + sk_common_release(sk); + } +out: + return rc; +} + +static const struct net_proto_family ieee802154_family_ops = { + .family = PF_IEEE802154, + .create = ieee802154_create, + .owner = THIS_MODULE, +}; + +static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + if (!netif_running(dev)) + goto drop; + pr_debug("got frame, type %d, dev %p\n", dev->type, dev); +#ifdef DEBUG + print_hex_dump_bytes("ieee802154_rcv ", + DUMP_PREFIX_NONE, skb->data, skb->len); +#endif + + if (!net_eq(dev_net(dev), &init_net)) + goto drop; + + ieee802154_raw_deliver(dev, skb); + + if (dev->type != ARPHRD_IEEE802154) + goto drop; + + if (skb->pkt_type != PACKET_OTHERHOST) + return ieee802154_dgram_deliver(dev, skb); + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type ieee802154_packet_type = { + .type = htons(ETH_P_IEEE802154), + .func = ieee802154_rcv, +}; + +static int __init af_ieee802154_init(void) +{ + int rc = -EINVAL; + + rc = proto_register(&ieee802154_raw_prot, 1); + if (rc) + goto out; + + rc = proto_register(&ieee802154_dgram_prot, 1); + if (rc) + goto err_dgram; + + /* Tell SOCKET that we are alive */ + rc = sock_register(&ieee802154_family_ops); + if (rc) + goto err_sock; + dev_add_pack(&ieee802154_packet_type); + + rc = 0; + goto out; + +err_sock: + proto_unregister(&ieee802154_dgram_prot); +err_dgram: + proto_unregister(&ieee802154_raw_prot); +out: + return rc; +} + +static void __exit af_ieee802154_remove(void) +{ + dev_remove_pack(&ieee802154_packet_type); + sock_unregister(PF_IEEE802154); + proto_unregister(&ieee802154_dgram_prot); + proto_unregister(&ieee802154_raw_prot); +} + +module_init(af_ieee802154_init); +module_exit(af_ieee802154_remove); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_IEEE802154); diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c index 1613b9c65dfa..dff55c2d87f3 100644 --- a/net/ieee802154/sysfs.c +++ b/net/ieee802154/sysfs.c @@ -68,7 +68,7 @@ static DEVICE_ATTR_RO(name) MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); MASTER_SHOW(transmit_power, "%d +- 1 dB"); -MASTER_SHOW(cca_mode, "%d"); +MASTER_SHOW_COMPLEX(cca_mode, "%d", phy->cca.mode); static ssize_t channels_supported_show(struct device *dev, struct device_attribute *attr, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a44773c8346c..d2e49baaff63 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -395,8 +395,6 @@ int inet_release(struct socket *sock) if (sk) { long timeout; - sock_rps_reset_flow(sk); - /* Applications forget to leave groups before exiting */ ip_mc_drop_socket(sk); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 5160c710f2eb..e361ea6f3fc8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -378,20 +378,18 @@ static int cipso_v4_cache_check(const unsigned char *key, * negative values on failure. * */ -int cipso_v4_cache_add(const struct sk_buff *skb, +int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; - unsigned char *cipso_ptr; u32 cipso_ptr_len; if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) return 0; - cipso_ptr = CIPSO_V4_OPTPTR(skb); cipso_ptr_len = cipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); @@ -1579,6 +1577,33 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, } /** + * cipso_v4_optptr - Find the CIPSO option in the packet + * @skb: the packet + * + * Description: + * Parse the packet's IP header looking for a CIPSO option. Returns a pointer + * to the start of the CIPSO option on success, NULL if one if not found. + * + */ +unsigned char *cipso_v4_optptr(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + unsigned char *optptr = (unsigned char *)&(ip_hdr(skb)[1]); + int optlen; + int taglen; + + for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { + if (optptr[0] == IPOPT_CIPSO) + return optptr; + taglen = optptr[1]; + optlen -= taglen; + optptr += taglen; + } + + return NULL; +} + +/** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error * @@ -2119,8 +2144,8 @@ void cipso_v4_req_delattr(struct request_sock *req) * on success and negative values on failure. * */ -static int cipso_v4_getattr(const unsigned char *cipso, - struct netlbl_lsm_secattr *secattr) +int cipso_v4_getattr(const unsigned char *cipso, + struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; u32 doi; @@ -2305,22 +2330,6 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb) return 0; } -/** - * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option - * @skb: the packet - * @secattr: the security attributes - * - * Description: - * Parse the given packet's CIPSO option and return the security attributes. - * Returns zero on success and negative values on failure. - * - */ -int cipso_v4_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr) -{ - return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr); -} - /* * Setup Functions */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 214882e7d6de..3a8985c94581 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1186,7 +1186,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) no_in_dev: /* Not loopback addresses on loopback should be preferred - in this case. It is importnat that lo is the first interface + in this case. It is important that lo is the first interface in dev_base list. */ for_each_netdev_rcu(net, dev) { @@ -1522,7 +1522,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, preferred, valid)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1566,7 +1567,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWADDR, NLM_F_MULTI) <= 0) { + RTM_NEWADDR, NLM_F_MULTI) < 0) { rcu_read_unlock(); goto done; } @@ -1749,7 +1750,8 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -1881,7 +1883,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) { + -1) < 0) { rcu_read_unlock(); goto done; } @@ -1897,7 +1899,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -1908,7 +1910,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -2320,7 +2322,7 @@ static __net_initdata struct pernet_operations devinet_ops = { .exit = devinet_exit_net, }; -static struct rtnl_af_ops inet_af_ops = { +static struct rtnl_af_ops inet_af_ops __read_mostly = { .family = AF_INET, .fill_link_af = inet_fill_link_af, .get_link_af_size = inet_get_link_af_size, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 23104a3f2924..57be71dd6a9e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -67,7 +67,7 @@ static int __net_init fib4_rules_init(struct net *net) return 0; fail: - kfree(local_table); + fib_free_table(local_table); return -ENOMEM; } #else @@ -109,6 +109,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) return tb; } +/* caller must hold either rtnl or rcu read lock */ struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; @@ -119,15 +120,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id) id = RT_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); head = &net->ipv4.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - if (tb->tb_id == id) { - rcu_read_unlock(); + if (tb->tb_id == id) return tb; - } } - rcu_read_unlock(); return NULL; } #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -167,16 +164,18 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (ipv4_is_multicast(addr)) return RTN_MULTICAST; + rcu_read_lock(); + local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; - rcu_read_lock(); if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } - rcu_read_unlock(); } + + rcu_read_unlock(); return ret; } @@ -919,7 +918,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) #undef BRD1_OK } -static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) +static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn) { struct fib_result res; @@ -929,6 +928,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) .flowi4_tos = frn->fl_tos, .flowi4_scope = frn->fl_scope, }; + struct fib_table *tb; + + rcu_read_lock(); + + tb = fib_get_table(net, frn->tb_id_in); frn->err = -ENOENT; if (tb) { @@ -945,6 +949,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) } local_bh_enable(); } + + rcu_read_unlock(); } static void nl_fib_input(struct sk_buff *skb) @@ -952,7 +958,6 @@ static void nl_fib_input(struct sk_buff *skb) struct net *net; struct fib_result_nl *frn; struct nlmsghdr *nlh; - struct fib_table *tb; u32 portid; net = sock_net(skb->sk); @@ -967,9 +972,7 @@ static void nl_fib_input(struct sk_buff *skb) nlh = nlmsg_hdr(skb); frn = (struct fib_result_nl *) nlmsg_data(nlh); - tb = fib_get_table(net, frn->tb_id_in); - - nl_fib_lookup(frn, tb); + nl_fib_lookup(net, frn); portid = NETLINK_CB(skb).portid; /* netlink portid */ NETLINK_CB(skb).portid = 0; /* from kernel */ diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 1e4f6600b31d..825981b1049a 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -32,7 +32,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, unsigned int); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 8f7bd56955b0..d3db718be51d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -81,27 +81,25 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, break; case FR_ACT_UNREACHABLE: - err = -ENETUNREACH; - goto errout; + return -ENETUNREACH; case FR_ACT_PROHIBIT: - err = -EACCES; - goto errout; + return -EACCES; case FR_ACT_BLACKHOLE: default: - err = -EINVAL; - goto errout; + return -EINVAL; } + rcu_read_lock(); + tbl = fib_get_table(rule->fr_net, rule->table); - if (!tbl) - goto errout; + if (tbl) + err = fib_table_lookup(tbl, &flp->u.ip4, + (struct fib_result *)arg->result, + arg->flags); - err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); - if (err > 0) - err = -EAGAIN; -errout: + rcu_read_unlock(); return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f99f41bd15b8..1e2090ea663e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(4) /* RTA_DST */ + nla_total_size(4) /* RTA_PRIORITY */ - + nla_total_size(4); /* RTA_PREFSRC */ + + nla_total_size(4) /* RTA_PREFSRC */ + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); @@ -410,24 +411,6 @@ errout: rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); } -/* Return the first fib alias matching TOS with - * priority less than or equal to PRIO. - */ -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) -{ - if (fah) { - struct fib_alias *fa; - list_for_each_entry(fa, fah, fa_list) { - if (fa->fa_tos > tos) - continue; - if (fa->fa_info->fib_priority >= prio || - fa->fa_tos < tos) - return fa; - } - } - return NULL; -} - static int fib_detect_death(struct fib_info *fi, int order, struct fib_info **last_resort, int *last_idx, int dflt) @@ -859,7 +842,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (type > RTAX_MAX) goto err_inval; - val = nla_get_u32(nla); + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err_inval; + } else { + val = nla_get_u32(nla); + } if (type == RTAX_ADVMSS && val > 65535 - 40) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) @@ -1081,7 +1073,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_nest_end(skb, mp); } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 18bcaf2ff2fd..3daf0224ff2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -83,28 +83,33 @@ #define MAX_STAT_DEPTH 32 -#define KEYLENGTH (8*sizeof(t_key)) +#define KEYLENGTH (8*sizeof(t_key)) +#define KEY_MAX ((t_key)~0) typedef unsigned int t_key; -#define T_TNODE 0 -#define T_LEAF 1 -#define NODE_TYPE_MASK 0x1UL -#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) +#define IS_TNODE(n) ((n)->bits) +#define IS_LEAF(n) (!(n)->bits) -#define IS_TNODE(n) (!(n->parent & T_LEAF)) -#define IS_LEAF(n) (n->parent & T_LEAF) +#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos) -struct rt_trie_node { - unsigned long parent; - t_key key; -}; - -struct leaf { - unsigned long parent; +struct tnode { t_key key; - struct hlist_head list; + unsigned char bits; /* 2log(KEYLENGTH) bits needed */ + unsigned char pos; /* 2log(KEYLENGTH) bits needed */ + unsigned char slen; + struct tnode __rcu *parent; struct rcu_head rcu; + union { + /* The fields in this struct are valid if bits > 0 (TNODE) */ + struct { + t_key empty_children; /* KEYLENGTH bits needed */ + t_key full_children; /* KEYLENGTH bits needed */ + struct tnode __rcu *child[0]; + }; + /* This list pointer if valid if bits == 0 (LEAF) */ + struct hlist_head list; + }; }; struct leaf_info { @@ -115,20 +120,6 @@ struct leaf_info { struct rcu_head rcu; }; -struct tnode { - unsigned long parent; - t_key key; - unsigned char pos; /* 2log(KEYLENGTH) bits needed */ - unsigned char bits; /* 2log(KEYLENGTH) bits needed */ - unsigned int full_children; /* KEYLENGTH bits needed */ - unsigned int empty_children; /* KEYLENGTH bits needed */ - union { - struct rcu_head rcu; - struct tnode *tnode_free; - }; - struct rt_trie_node __rcu *child[0]; -}; - #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats { unsigned int gets; @@ -151,19 +142,13 @@ struct trie_stat { }; struct trie { - struct rt_trie_node __rcu *trie; + struct tnode __rcu *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS - struct trie_use_stats stats; + struct trie_use_stats __percpu *stats; #endif }; -static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, - int wasfull); -static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); -static struct tnode *inflate(struct trie *t, struct tnode *tn); -static struct tnode *halve(struct trie *t, struct tnode *tn); -/* tnodes to free after resize(); protected by RTNL */ -static struct tnode *tnode_free_head; +static void resize(struct trie *t, struct tnode *tn); static size_t tnode_free_size; /* @@ -176,170 +161,101 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -/* - * caller must hold RTNL - */ -static inline struct tnode *node_parent(const struct rt_trie_node *node) -{ - unsigned long parent; - - parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); +/* caller must hold RTNL */ +#define node_parent(n) rtnl_dereference((n)->parent) - return (struct tnode *)(parent & ~NODE_TYPE_MASK); -} +/* caller must hold RCU read lock or RTNL */ +#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent) -/* - * caller must hold RCU read lock or RTNL - */ -static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) +/* wrapper for rcu_assign_pointer */ +static inline void node_set_parent(struct tnode *n, struct tnode *tp) { - unsigned long parent; - - parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || - lockdep_rtnl_is_held()); - - return (struct tnode *)(parent & ~NODE_TYPE_MASK); + if (n) + rcu_assign_pointer(n->parent, tp); } -/* Same as rcu_assign_pointer - * but that macro() assumes that value is a pointer. +#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p) + +/* This provides us with the number of children in this node, in the case of a + * leaf this will return 0 meaning none of the children are accessible. */ -static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) +static inline unsigned long tnode_child_length(const struct tnode *tn) { - smp_wmb(); - node->parent = (unsigned long)ptr | NODE_TYPE(node); + return (1ul << tn->bits) & ~(1ul); } -/* - * caller must hold RTNL - */ -static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) +/* caller must hold RTNL */ +static inline struct tnode *tnode_get_child(const struct tnode *tn, + unsigned long i) { - BUG_ON(i >= 1U << tn->bits); - return rtnl_dereference(tn->child[i]); } -/* - * caller must hold RCU read lock or RTNL - */ -static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) +/* caller must hold RCU read lock or RTNL */ +static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, + unsigned long i) { - BUG_ON(i >= 1U << tn->bits); - return rcu_dereference_rtnl(tn->child[i]); } -static inline int tnode_child_length(const struct tnode *tn) -{ - return 1 << tn->bits; -} - -static inline t_key mask_pfx(t_key k, unsigned int l) -{ - return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); -} - -static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) -{ - if (offset < KEYLENGTH) - return ((t_key)(a << offset)) >> (KEYLENGTH - bits); - else - return 0; -} - -static inline int tkey_equals(t_key a, t_key b) -{ - return a == b; -} - -static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) -{ - if (bits == 0 || offset >= KEYLENGTH) - return 1; - bits = bits > KEYLENGTH ? KEYLENGTH : bits; - return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; -} - -static inline int tkey_mismatch(t_key a, int offset, t_key b) -{ - t_key diff = a ^ b; - int i = offset; - - if (!diff) - return 0; - while ((diff << i) >> (KEYLENGTH-1) == 0) - i++; - return i; -} - -/* - To understand this stuff, an understanding of keys and all their bits is - necessary. Every node in the trie has a key associated with it, but not - all of the bits in that key are significant. - - Consider a node 'n' and its parent 'tp'. - - If n is a leaf, every bit in its key is significant. Its presence is - necessitated by path compression, since during a tree traversal (when - searching for a leaf - unless we are doing an insertion) we will completely - ignore all skipped bits we encounter. Thus we need to verify, at the end of - a potentially successful search, that we have indeed been walking the - correct key path. - - Note that we can never "miss" the correct key in the tree if present by - following the wrong path. Path compression ensures that segments of the key - that are the same for all keys with a given prefix are skipped, but the - skipped part *is* identical for each node in the subtrie below the skipped - bit! trie_insert() in this implementation takes care of that - note the - call to tkey_sub_equals() in trie_insert(). - - if n is an internal node - a 'tnode' here, the various parts of its key - have many different meanings. - - Example: - _________________________________________________________________ - | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | - ----------------------------------------------------------------- - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - - _________________________________________________________________ - | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | - ----------------------------------------------------------------- - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - - tp->pos = 7 - tp->bits = 3 - n->pos = 15 - n->bits = 4 - - First, let's just ignore the bits that come before the parent tp, that is - the bits from 0 to (tp->pos-1). They are *known* but at this point we do - not use them for anything. - - The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the - index into the parent's child array. That is, they will be used to find - 'n' among tp's children. - - The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits - for the node n. - - All the bits we have seen so far are significant to the node n. The rest - of the bits are really not needed or indeed known in n->key. - - The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into - n's child array, and will of course be different for each child. - - - The rest of the bits, from (n->pos + n->bits) onward, are completely unknown - at this point. - -*/ - -static inline void check_tnode(const struct tnode *tn) -{ - WARN_ON(tn && tn->pos+tn->bits > 32); -} +/* To understand this stuff, an understanding of keys and all their bits is + * necessary. Every node in the trie has a key associated with it, but not + * all of the bits in that key are significant. + * + * Consider a node 'n' and its parent 'tp'. + * + * If n is a leaf, every bit in its key is significant. Its presence is + * necessitated by path compression, since during a tree traversal (when + * searching for a leaf - unless we are doing an insertion) we will completely + * ignore all skipped bits we encounter. Thus we need to verify, at the end of + * a potentially successful search, that we have indeed been walking the + * correct key path. + * + * Note that we can never "miss" the correct key in the tree if present by + * following the wrong path. Path compression ensures that segments of the key + * that are the same for all keys with a given prefix are skipped, but the + * skipped part *is* identical for each node in the subtrie below the skipped + * bit! trie_insert() in this implementation takes care of that. + * + * if n is an internal node - a 'tnode' here, the various parts of its key + * have many different meanings. + * + * Example: + * _________________________________________________________________ + * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | + * ----------------------------------------------------------------- + * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 + * + * _________________________________________________________________ + * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | + * ----------------------------------------------------------------- + * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 + * + * tp->pos = 22 + * tp->bits = 3 + * n->pos = 13 + * n->bits = 4 + * + * First, let's just ignore the bits that come before the parent tp, that is + * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this + * point we do not use them for anything. + * + * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the + * index into the parent's child array. That is, they will be used to find + * 'n' among tp's children. + * + * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits + * for the node n. + * + * All the bits we have seen so far are significant to the node n. The rest + * of the bits are really not needed or indeed known in n->key. + * + * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into + * n's child array, and will of course be different for each child. + * + * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown + * at this point. + */ static const int halve_threshold = 25; static const int inflate_threshold = 50; @@ -357,17 +273,23 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) call_rcu(&fa->rcu, __alias_free_mem); } -static void __leaf_free_rcu(struct rcu_head *head) -{ - struct leaf *l = container_of(head, struct leaf, rcu); - kmem_cache_free(trie_leaf_kmem, l); -} +#define TNODE_KMALLOC_MAX \ + ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *)) -static inline void free_leaf(struct leaf *l) +static void __node_free_rcu(struct rcu_head *head) { - call_rcu(&l->rcu, __leaf_free_rcu); + struct tnode *n = container_of(head, struct tnode, rcu); + + if (IS_LEAF(n)) + kmem_cache_free(trie_leaf_kmem, n); + else if (n->bits <= TNODE_KMALLOC_MAX) + kfree(n); + else + vfree(n); } +#define node_free(n) call_rcu(&n->rcu, __node_free_rcu) + static inline void free_leaf_info(struct leaf_info *leaf) { kfree_rcu(leaf, rcu); @@ -381,56 +303,31 @@ static struct tnode *tnode_alloc(size_t size) return vzalloc(size); } -static void __tnode_free_rcu(struct rcu_head *head) -{ - struct tnode *tn = container_of(head, struct tnode, rcu); - size_t size = sizeof(struct tnode) + - (sizeof(struct rt_trie_node *) << tn->bits); - - if (size <= PAGE_SIZE) - kfree(tn); - else - vfree(tn); -} - -static inline void tnode_free(struct tnode *tn) -{ - if (IS_LEAF(tn)) - free_leaf((struct leaf *) tn); - else - call_rcu(&tn->rcu, __tnode_free_rcu); -} - -static void tnode_free_safe(struct tnode *tn) +static inline void empty_child_inc(struct tnode *n) { - BUG_ON(IS_LEAF(tn)); - tn->tnode_free = tnode_free_head; - tnode_free_head = tn; - tnode_free_size += sizeof(struct tnode) + - (sizeof(struct rt_trie_node *) << tn->bits); + ++n->empty_children ? : ++n->full_children; } -static void tnode_free_flush(void) +static inline void empty_child_dec(struct tnode *n) { - struct tnode *tn; - - while ((tn = tnode_free_head)) { - tnode_free_head = tn->tnode_free; - tn->tnode_free = NULL; - tnode_free(tn); - } - - if (tnode_free_size >= PAGE_SIZE * sync_pages) { - tnode_free_size = 0; - synchronize_rcu(); - } + n->empty_children-- ? : n->full_children--; } -static struct leaf *leaf_new(void) +static struct tnode *leaf_new(t_key key) { - struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); + struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); if (l) { - l->parent = T_LEAF; + l->parent = NULL; + /* set key and pos to reflect full key value + * any trailing zeros in the key should be ignored + * as the nodes are searched + */ + l->key = key; + l->slen = 0; + l->pos = 0; + /* set bits to 0 indicating we are not a tnode */ + l->bits = 0; + INIT_HLIST_HEAD(&l->list); } return l; @@ -449,462 +346,530 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); + size_t sz = offsetof(struct tnode, child[1ul << bits]); struct tnode *tn = tnode_alloc(sz); + unsigned int shift = pos + bits; + + /* verify bits and pos their msb bits clear and values are valid */ + BUG_ON(!bits || (shift > KEYLENGTH)); if (tn) { - tn->parent = T_TNODE; + tn->parent = NULL; + tn->slen = pos; tn->pos = pos; tn->bits = bits; - tn->key = key; - tn->full_children = 0; - tn->empty_children = 1<<bits; + tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; + if (bits == KEYLENGTH) + tn->full_children = 1; + else + tn->empty_children = 1ul << bits; } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct rt_trie_node *) << bits); + sizeof(struct tnode *) << bits); return tn; } -/* - * Check whether a tnode 'n' is "full", i.e. it is an internal node +/* Check whether a tnode 'n' is "full", i.e. it is an internal node * and no bits are skipped. See discussion in dyntree paper p. 6 */ - -static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) +static inline int tnode_full(const struct tnode *tn, const struct tnode *n) { - if (n == NULL || IS_LEAF(n)) - return 0; - - return ((struct tnode *) n)->pos == tn->pos + tn->bits; + return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n); } -static inline void put_child(struct tnode *tn, int i, - struct rt_trie_node *n) -{ - tnode_put_child_reorg(tn, i, n, -1); -} - - /* - * Add a child at position i overwriting the old value. - * Update the value of full_children and empty_children. - */ - -static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, - int wasfull) +/* Add a child at position i overwriting the old value. + * Update the value of full_children and empty_children. + */ +static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) { - struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); - int isfull; + struct tnode *chi = tnode_get_child(tn, i); + int isfull, wasfull; - BUG_ON(i >= 1<<tn->bits); + BUG_ON(i >= tnode_child_length(tn)); - /* update emptyChildren */ + /* update emptyChildren, overflow into fullChildren */ if (n == NULL && chi != NULL) - tn->empty_children++; - else if (n != NULL && chi == NULL) - tn->empty_children--; + empty_child_inc(tn); + if (n != NULL && chi == NULL) + empty_child_dec(tn); /* update fullChildren */ - if (wasfull == -1) - wasfull = tnode_full(tn, chi); - + wasfull = tnode_full(tn, chi); isfull = tnode_full(tn, n); + if (wasfull && !isfull) tn->full_children--; else if (!wasfull && isfull) tn->full_children++; - if (n) - node_set_parent(n, tn); + if (n && (tn->slen < n->slen)) + tn->slen = n->slen; rcu_assign_pointer(tn->child[i], n); } -#define MAX_WORK 10 -static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) +static void update_children(struct tnode *tn) { - int i; - struct tnode *old_tn; - int inflate_threshold_use; - int halve_threshold_use; - int max_work; + unsigned long i; - if (!tn) - return NULL; + /* update all of the child parent pointers */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); - pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", - tn, inflate_threshold, halve_threshold); + if (!inode) + continue; - /* No children */ - if (tn->empty_children == tnode_child_length(tn)) { - tnode_free_safe(tn); - return NULL; + /* Either update the children of a tnode that + * already belongs to us or update the child + * to point to ourselves. + */ + if (node_parent(inode) == tn) + update_children(inode); + else + node_set_parent(inode, tn); } - /* One child */ - if (tn->empty_children == tnode_child_length(tn) - 1) - goto one_child; - /* - * Double as long as the resulting node has a number of - * nonempty nodes that are above the threshold. - */ - - /* - * From "Implementing a dynamic compressed trie" by Stefan Nilsson of - * the Helsinki University of Technology and Matti Tikkanen of Nokia - * Telecommunications, page 6: - * "A node is doubled if the ratio of non-empty children to all - * children in the *doubled* node is at least 'high'." - * - * 'high' in this instance is the variable 'inflate_threshold'. It - * is expressed as a percentage, so we multiply it with - * tnode_child_length() and instead of multiplying by 2 (since the - * child array will be doubled by inflate()) and multiplying - * the left-hand side by 100 (to handle the percentage thing) we - * multiply the left-hand side by 50. - * - * The left-hand side may look a bit weird: tnode_child_length(tn) - * - tn->empty_children is of course the number of non-null children - * in the current node. tn->full_children is the number of "full" - * children, that is non-null tnodes with a skip value of 0. - * All of those will be doubled in the resulting inflated tnode, so - * we just count them one extra time here. - * - * A clearer way to write this would be: - * - * to_be_doubled = tn->full_children; - * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - - * tn->full_children; - * - * new_child_length = tnode_child_length(tn) * 2; - * - * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / - * new_child_length; - * if (new_fill_factor >= inflate_threshold) - * - * ...and so on, tho it would mess up the while () loop. - * - * anyway, - * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= - * inflate_threshold - * - * avoid a division: - * 100 * (not_to_be_doubled + 2*to_be_doubled) >= - * inflate_threshold * new_child_length - * - * expand not_to_be_doubled and to_be_doubled, and shorten: - * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children) >= inflate_threshold * new_child_length - * - * expand new_child_length: - * 100 * (tnode_child_length(tn) - tn->empty_children + - * tn->full_children) >= - * inflate_threshold * tnode_child_length(tn) * 2 - * - * shorten again: - * 50 * (tn->full_children + tnode_child_length(tn) - - * tn->empty_children) >= inflate_threshold * - * tnode_child_length(tn) - * - */ +} - check_tnode(tn); +static inline void put_child_root(struct tnode *tp, struct trie *t, + t_key key, struct tnode *n) +{ + if (tp) + put_child(tp, get_index(key, tp), n); + else + rcu_assign_pointer(t->trie, n); +} - /* Keep root node larger */ +static inline void tnode_free_init(struct tnode *tn) +{ + tn->rcu.next = NULL; +} - if (!node_parent((struct rt_trie_node *)tn)) { - inflate_threshold_use = inflate_threshold_root; - halve_threshold_use = halve_threshold_root; - } else { - inflate_threshold_use = inflate_threshold; - halve_threshold_use = halve_threshold; - } +static inline void tnode_free_append(struct tnode *tn, struct tnode *n) +{ + n->rcu.next = tn->rcu.next; + tn->rcu.next = &n->rcu; +} - max_work = MAX_WORK; - while ((tn->full_children > 0 && max_work-- && - 50 * (tn->full_children + tnode_child_length(tn) - - tn->empty_children) - >= inflate_threshold_use * tnode_child_length(tn))) { +static void tnode_free(struct tnode *tn) +{ + struct callback_head *head = &tn->rcu; - old_tn = tn; - tn = inflate(t, tn); + while (head) { + head = head->next; + tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]); + node_free(tn); - if (IS_ERR(tn)) { - tn = old_tn; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.resize_node_skipped++; -#endif - break; - } + tn = container_of(head, struct tnode, rcu); } - check_tnode(tn); - - /* Return if at least one inflate is run */ - if (max_work != MAX_WORK) - return (struct rt_trie_node *) tn; - - /* - * Halve as long as the number of empty children in this - * node is above threshold. - */ - - max_work = MAX_WORK; - while (tn->bits > 1 && max_work-- && - 100 * (tnode_child_length(tn) - tn->empty_children) < - halve_threshold_use * tnode_child_length(tn)) { - - old_tn = tn; - tn = halve(t, tn); - if (IS_ERR(tn)) { - tn = old_tn; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.resize_node_skipped++; -#endif - break; - } + if (tnode_free_size >= PAGE_SIZE * sync_pages) { + tnode_free_size = 0; + synchronize_rcu(); } +} +static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) +{ + struct tnode *tp = node_parent(oldtnode); + unsigned long i; - /* Only one child remains */ - if (tn->empty_children == tnode_child_length(tn) - 1) { -one_child: - for (i = 0; i < tnode_child_length(tn); i++) { - struct rt_trie_node *n; - - n = rtnl_dereference(tn->child[i]); - if (!n) - continue; - - /* compress one level */ + /* setup the parent pointer out of and back into this node */ + NODE_INIT_PARENT(tn, tp); + put_child_root(tp, t, tn->key, tn); - node_set_parent(n, NULL); - tnode_free_safe(tn); - return n; - } - } - return (struct rt_trie_node *) tn; -} + /* update all of the child parent pointers */ + update_children(tn); + /* all pointers should be clean so we are done */ + tnode_free(oldtnode); -static void tnode_clean_free(struct tnode *tn) -{ - int i; - struct tnode *tofree; + /* resize children now that oldtnode is freed */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); - for (i = 0; i < tnode_child_length(tn); i++) { - tofree = (struct tnode *)rtnl_dereference(tn->child[i]); - if (tofree) - tnode_free(tofree); + /* resize child node */ + if (tnode_full(tn, inode)) + resize(t, inode); } - tnode_free(tn); } -static struct tnode *inflate(struct trie *t, struct tnode *tn) +static int inflate(struct trie *t, struct tnode *oldtnode) { - struct tnode *oldtnode = tn; - int olen = tnode_child_length(tn); - int i; + struct tnode *tn; + unsigned long i; + t_key m; pr_debug("In inflate\n"); - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); - + tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1); if (!tn) - return ERR_PTR(-ENOMEM); - - /* - * Preallocate and store tnodes before the actual work so we - * don't get into an inconsistent state if memory allocation - * fails. In case of failure we return the oldnode and inflate - * of tnode is ignored. - */ - - for (i = 0; i < olen; i++) { - struct tnode *inode; - - inode = (struct tnode *) tnode_get_child(oldtnode, i); - if (inode && - IS_TNODE(inode) && - inode->pos == oldtnode->pos + oldtnode->bits && - inode->bits > 1) { - struct tnode *left, *right; - t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; - - left = tnode_new(inode->key&(~m), inode->pos + 1, - inode->bits - 1); - if (!left) - goto nomem; - - right = tnode_new(inode->key|m, inode->pos + 1, - inode->bits - 1); - - if (!right) { - tnode_free(left); - goto nomem; - } + return -ENOMEM; - put_child(tn, 2*i, (struct rt_trie_node *) left); - put_child(tn, 2*i+1, (struct rt_trie_node *) right); - } - } + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); - for (i = 0; i < olen; i++) { - struct tnode *inode; - struct rt_trie_node *node = tnode_get_child(oldtnode, i); - struct tnode *left, *right; - int size, j; + /* Assemble all of the pointers in our cluster, in this case that + * represents all of the pointers out of our allocated nodes that + * point to existing tnodes and the links between our allocated + * nodes. + */ + for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { + struct tnode *inode = tnode_get_child(oldtnode, --i); + struct tnode *node0, *node1; + unsigned long j, k; /* An empty child */ - if (node == NULL) + if (inode == NULL) continue; /* A leaf or an internal node with skipped bits */ - - if (IS_LEAF(node) || ((struct tnode *) node)->pos > - tn->pos + tn->bits - 1) { - put_child(tn, - tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), - node); + if (!tnode_full(oldtnode, inode)) { + put_child(tn, get_index(inode->key, tn), inode); continue; } - /* An internal node with two children */ - inode = (struct tnode *) node; + /* drop the node in the old tnode free list */ + tnode_free_append(oldtnode, inode); + /* An internal node with two children */ if (inode->bits == 1) { - put_child(tn, 2*i, rtnl_dereference(inode->child[0])); - put_child(tn, 2*i+1, rtnl_dereference(inode->child[1])); - - tnode_free_safe(inode); + put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); + put_child(tn, 2 * i, tnode_get_child(inode, 0)); continue; } - /* An internal node with more than two children */ - /* We will replace this node 'inode' with two new - * ones, 'left' and 'right', each with half of the + * ones, 'node0' and 'node1', each with half of the * original children. The two new nodes will have * a position one bit further down the key and this * means that the "significant" part of their keys * (see the discussion near the top of this file) * will differ by one bit, which will be "0" in - * left's key and "1" in right's key. Since we are + * node0's key and "1" in node1's key. Since we are * moving the key position by one step, the bit that * we are moving away from - the bit at position - * (inode->pos) - is the one that will differ between - * left and right. So... we synthesize that bit in the - * two new keys. - * The mask 'm' below will be a single "one" bit at - * the position (inode->pos) + * (tn->pos) - is the one that will differ between + * node0 and node1. So... we synthesize that bit in the + * two new keys. */ + node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); + if (!node1) + goto nomem; + node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); + + tnode_free_append(tn, node1); + if (!node0) + goto nomem; + tnode_free_append(tn, node0); + + /* populate child pointers in new nodes */ + for (k = tnode_child_length(inode), j = k / 2; j;) { + put_child(node1, --j, tnode_get_child(inode, --k)); + put_child(node0, j, tnode_get_child(inode, j)); + put_child(node1, --j, tnode_get_child(inode, --k)); + put_child(node0, j, tnode_get_child(inode, j)); + } - /* Use the old key, but set the new significant - * bit to zero. - */ + /* link new nodes to parent */ + NODE_INIT_PARENT(node1, tn); + NODE_INIT_PARENT(node0, tn); + + /* link parent to nodes */ + put_child(tn, 2 * i + 1, node1); + put_child(tn, 2 * i, node0); + } + + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); + + return 0; +nomem: + /* all pointers should be clean so we are done */ + tnode_free(tn); + return -ENOMEM; +} + +static int halve(struct trie *t, struct tnode *oldtnode) +{ + struct tnode *tn; + unsigned long i; + + pr_debug("In halve\n"); - left = (struct tnode *) tnode_get_child(tn, 2*i); - put_child(tn, 2*i, NULL); + tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1); + if (!tn) + return -ENOMEM; - BUG_ON(!left); + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); - right = (struct tnode *) tnode_get_child(tn, 2*i+1); - put_child(tn, 2*i+1, NULL); + /* Assemble all of the pointers in our cluster, in this case that + * represents all of the pointers out of our allocated nodes that + * point to existing tnodes and the links between our allocated + * nodes. + */ + for (i = tnode_child_length(oldtnode); i;) { + struct tnode *node1 = tnode_get_child(oldtnode, --i); + struct tnode *node0 = tnode_get_child(oldtnode, --i); + struct tnode *inode; - BUG_ON(!right); + /* At least one of the children is empty */ + if (!node1 || !node0) { + put_child(tn, i / 2, node1 ? : node0); + continue; + } - size = tnode_child_length(left); - for (j = 0; j < size; j++) { - put_child(left, j, rtnl_dereference(inode->child[j])); - put_child(right, j, rtnl_dereference(inode->child[j + size])); + /* Two nonempty children */ + inode = tnode_new(node0->key, oldtnode->pos, 1); + if (!inode) { + tnode_free(tn); + return -ENOMEM; } - put_child(tn, 2*i, resize(t, left)); - put_child(tn, 2*i+1, resize(t, right)); + tnode_free_append(tn, inode); + + /* initialize pointers out of node */ + put_child(inode, 1, node1); + put_child(inode, 0, node0); + NODE_INIT_PARENT(inode, tn); - tnode_free_safe(inode); + /* link parent to node */ + put_child(tn, i / 2, inode); } - tnode_free_safe(oldtnode); - return tn; -nomem: - tnode_clean_free(tn); - return ERR_PTR(-ENOMEM); + + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); + + return 0; } -static struct tnode *halve(struct trie *t, struct tnode *tn) +static void collapse(struct trie *t, struct tnode *oldtnode) { - struct tnode *oldtnode = tn; - struct rt_trie_node *left, *right; - int i; - int olen = tnode_child_length(tn); + struct tnode *n, *tp; + unsigned long i; - pr_debug("In halve\n"); + /* scan the tnode looking for that one child that might still exist */ + for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) + n = tnode_get_child(oldtnode, --i); - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); + /* compress one level */ + tp = node_parent(oldtnode); + put_child_root(tp, t, oldtnode->key, n); + node_set_parent(n, tp); - if (!tn) - return ERR_PTR(-ENOMEM); + /* drop dead node */ + node_free(oldtnode); +} - /* - * Preallocate and store tnodes before the actual work so we - * don't get into an inconsistent state if memory allocation - * fails. In case of failure we return the oldnode and halve - * of tnode is ignored. +static unsigned char update_suffix(struct tnode *tn) +{ + unsigned char slen = tn->pos; + unsigned long stride, i; + + /* search though the list of children looking for nodes that might + * have a suffix greater than the one we currently have. This is + * why we start with a stride of 2 since a stride of 1 would + * represent the nodes with suffix length equal to tn->pos */ + for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) { + struct tnode *n = tnode_get_child(tn, i); - for (i = 0; i < olen; i += 2) { - left = tnode_get_child(oldtnode, i); - right = tnode_get_child(oldtnode, i+1); + if (!n || (n->slen <= slen)) + continue; - /* Two nonempty children */ - if (left && right) { - struct tnode *newn; + /* update stride and slen based on new value */ + stride <<= (n->slen - slen); + slen = n->slen; + i &= ~(stride - 1); - newn = tnode_new(left->key, tn->pos + tn->bits, 1); + /* if slen covers all but the last bit we can stop here + * there will be nothing longer than that since only node + * 0 and 1 << (bits - 1) could have that as their suffix + * length. + */ + if ((slen + 1) >= (tn->pos + tn->bits)) + break; + } - if (!newn) - goto nomem; + tn->slen = slen; - put_child(tn, i/2, (struct rt_trie_node *)newn); - } + return slen; +} - } +/* From "Implementing a dynamic compressed trie" by Stefan Nilsson of + * the Helsinki University of Technology and Matti Tikkanen of Nokia + * Telecommunications, page 6: + * "A node is doubled if the ratio of non-empty children to all + * children in the *doubled* node is at least 'high'." + * + * 'high' in this instance is the variable 'inflate_threshold'. It + * is expressed as a percentage, so we multiply it with + * tnode_child_length() and instead of multiplying by 2 (since the + * child array will be doubled by inflate()) and multiplying + * the left-hand side by 100 (to handle the percentage thing) we + * multiply the left-hand side by 50. + * + * The left-hand side may look a bit weird: tnode_child_length(tn) + * - tn->empty_children is of course the number of non-null children + * in the current node. tn->full_children is the number of "full" + * children, that is non-null tnodes with a skip value of 0. + * All of those will be doubled in the resulting inflated tnode, so + * we just count them one extra time here. + * + * A clearer way to write this would be: + * + * to_be_doubled = tn->full_children; + * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - + * tn->full_children; + * + * new_child_length = tnode_child_length(tn) * 2; + * + * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / + * new_child_length; + * if (new_fill_factor >= inflate_threshold) + * + * ...and so on, tho it would mess up the while () loop. + * + * anyway, + * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= + * inflate_threshold + * + * avoid a division: + * 100 * (not_to_be_doubled + 2*to_be_doubled) >= + * inflate_threshold * new_child_length + * + * expand not_to_be_doubled and to_be_doubled, and shorten: + * 100 * (tnode_child_length(tn) - tn->empty_children + + * tn->full_children) >= inflate_threshold * new_child_length + * + * expand new_child_length: + * 100 * (tnode_child_length(tn) - tn->empty_children + + * tn->full_children) >= + * inflate_threshold * tnode_child_length(tn) * 2 + * + * shorten again: + * 50 * (tn->full_children + tnode_child_length(tn) - + * tn->empty_children) >= inflate_threshold * + * tnode_child_length(tn) + * + */ +static bool should_inflate(const struct tnode *tp, const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + unsigned long threshold = used; - for (i = 0; i < olen; i += 2) { - struct tnode *newBinNode; + /* Keep root node larger */ + threshold *= tp ? inflate_threshold : inflate_threshold_root; + used -= tn->empty_children; + used += tn->full_children; - left = tnode_get_child(oldtnode, i); - right = tnode_get_child(oldtnode, i+1); + /* if bits == KEYLENGTH then pos = 0, and will fail below */ - /* At least one of the children is empty */ - if (left == NULL) { - if (right == NULL) /* Both are empty */ - continue; - put_child(tn, i/2, right); - continue; + return (used > 1) && tn->pos && ((50 * used) >= threshold); +} + +static bool should_halve(const struct tnode *tp, const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + unsigned long threshold = used; + + /* Keep root node larger */ + threshold *= tp ? halve_threshold : halve_threshold_root; + used -= tn->empty_children; + + /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ + + return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); +} + +static bool should_collapse(const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + + used -= tn->empty_children; + + /* account for bits == KEYLENGTH case */ + if ((tn->bits == KEYLENGTH) && tn->full_children) + used -= KEY_MAX; + + /* One child or none, time to drop us from the trie */ + return used < 2; +} + +#define MAX_WORK 10 +static void resize(struct trie *t, struct tnode *tn) +{ + struct tnode *tp = node_parent(tn); + struct tnode __rcu **cptr; + int max_work = MAX_WORK; + + pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", + tn, inflate_threshold, halve_threshold); + + /* track the tnode via the pointer from the parent instead of + * doing it ourselves. This way we can let RCU fully do its + * thing without us interfering + */ + cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; + BUG_ON(tn != rtnl_dereference(*cptr)); + + /* Double as long as the resulting node has a number of + * nonempty nodes that are above the threshold. + */ + while (should_inflate(tp, tn) && max_work) { + if (inflate(t, tn)) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(t->stats->resize_node_skipped); +#endif + break; } - if (right == NULL) { - put_child(tn, i/2, left); - continue; + max_work--; + tn = rtnl_dereference(*cptr); + } + + /* Return if at least one inflate is run */ + if (max_work != MAX_WORK) + return; + + /* Halve as long as the number of empty children in this + * node is above threshold. + */ + while (should_halve(tp, tn) && max_work) { + if (halve(t, tn)) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(t->stats->resize_node_skipped); +#endif + break; } - /* Two nonempty children */ - newBinNode = (struct tnode *) tnode_get_child(tn, i/2); - put_child(tn, i/2, NULL); - put_child(newBinNode, 0, left); - put_child(newBinNode, 1, right); - put_child(tn, i/2, resize(t, newBinNode)); + max_work--; + tn = rtnl_dereference(*cptr); + } + + /* Only one child remains */ + if (should_collapse(tn)) { + collapse(t, tn); + return; + } + + /* Return if at least one deflate was run */ + if (max_work != MAX_WORK) + return; + + /* push the suffix length to the parent node */ + if (tn->slen > tn->pos) { + unsigned char slen = update_suffix(tn); + + if (tp && (slen > tp->slen)) + tp->slen = slen; } - tnode_free_safe(oldtnode); - return tn; -nomem: - tnode_clean_free(tn); - return ERR_PTR(-ENOMEM); } /* readside must use rcu_read_lock currently dump routines via get_fa_head and dump */ -static struct leaf_info *find_leaf_info(struct leaf *l, int plen) +static struct leaf_info *find_leaf_info(struct tnode *l, int plen) { struct hlist_head *head = &l->list; struct leaf_info *li; @@ -916,7 +881,7 @@ static struct leaf_info *find_leaf_info(struct leaf *l, int plen) return NULL; } -static inline struct list_head *get_fa_head(struct leaf *l, int plen) +static inline struct list_head *get_fa_head(struct tnode *l, int plen) { struct leaf_info *li = find_leaf_info(l, plen); @@ -926,8 +891,51 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen) return &li->falh; } -static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) +static void leaf_pull_suffix(struct tnode *l) +{ + struct tnode *tp = node_parent(l); + + while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) { + if (update_suffix(tp) > l->slen) + break; + tp = node_parent(tp); + } +} + +static void leaf_push_suffix(struct tnode *l) +{ + struct tnode *tn = node_parent(l); + + /* if this is a new leaf then tn will be NULL and we can sort + * out parent suffix lengths as a part of trie_rebalance + */ + while (tn && (tn->slen < l->slen)) { + tn->slen = l->slen; + tn = node_parent(tn); + } +} + +static void remove_leaf_info(struct tnode *l, struct leaf_info *old) { + /* record the location of the previous list_info entry */ + struct hlist_node **pprev = old->hlist.pprev; + struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); + + /* remove the leaf info from the list */ + hlist_del_rcu(&old->hlist); + + /* only access li if it is pointing at the last valid hlist_node */ + if (hlist_empty(&l->list) || (*pprev)) + return; + + /* update the trie with the latest suffix length */ + l->slen = KEYLENGTH - li->plen; + leaf_pull_suffix(l); +} + +static void insert_leaf_info(struct tnode *l, struct leaf_info *new) +{ + struct hlist_head *head = &l->list; struct leaf_info *li = NULL, *last = NULL; if (hlist_empty(head)) { @@ -944,218 +952,174 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) else hlist_add_before_rcu(&new->hlist, &li->hlist); } + + /* if we added to the tail node then we need to update slen */ + if (l->slen < (KEYLENGTH - new->plen)) { + l->slen = KEYLENGTH - new->plen; + leaf_push_suffix(l); + } } /* rcu_read_lock needs to be hold by caller from readside */ +static struct tnode *fib_find_node(struct trie *t, u32 key) +{ + struct tnode *n = rcu_dereference_rtnl(t->trie); + + while (n) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the bits in the cindex. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if (index & (~0ul << bits)) + * we have a mismatch in skip bits and failed + * else + * we know the value is cindex + */ + if (index & (~0ul << n->bits)) + return NULL; -static struct leaf * -fib_find_node(struct trie *t, u32 key) -{ - int pos; - struct tnode *tn; - struct rt_trie_node *n; + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) + break; - pos = 0; - n = rcu_dereference_rtnl(t->trie); + n = tnode_get_child_rcu(n, index); + } - while (n != NULL && NODE_TYPE(n) == T_TNODE) { - tn = (struct tnode *) n; + return n; +} - check_tnode(tn); +/* Return the first fib alias matching TOS with + * priority less than or equal to PRIO. + */ +static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +{ + struct fib_alias *fa; - if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { - pos = tn->pos + tn->bits; - n = tnode_get_child_rcu(tn, - tkey_extract_bits(key, - tn->pos, - tn->bits)); - } else - break; - } - /* Case we have found a leaf. Compare prefixes */ + if (!fah) + return NULL; - if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) - return (struct leaf *)n; + list_for_each_entry(fa, fah, fa_list) { + if (fa->fa_tos > tos) + continue; + if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) + return fa; + } return NULL; } static void trie_rebalance(struct trie *t, struct tnode *tn) { - int wasfull; - t_key cindex, key; struct tnode *tp; - key = tn->key; - - while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); - tn = (struct tnode *)resize(t, tn); - - tnode_put_child_reorg(tp, cindex, - (struct rt_trie_node *)tn, wasfull); - - tp = node_parent((struct rt_trie_node *) tn); - if (!tp) - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - - tnode_free_flush(); - if (!tp) - break; + while ((tp = node_parent(tn)) != NULL) { + resize(t, tn); tn = tp; } /* Handle last (top) tnode */ if (IS_TNODE(tn)) - tn = (struct tnode *)resize(t, tn); - - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - tnode_free_flush(); + resize(t, tn); } /* only used from updater-side */ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) { - int pos, newpos; - struct tnode *tp = NULL, *tn = NULL; - struct rt_trie_node *n; - struct leaf *l; - int missbit; struct list_head *fa_head = NULL; + struct tnode *l, *n, *tp = NULL; struct leaf_info *li; - t_key cindex; - pos = 0; + li = leaf_info_new(plen); + if (!li) + return NULL; + fa_head = &li->falh; + n = rtnl_dereference(t->trie); /* If we point to NULL, stop. Either the tree is empty and we should * just put a new leaf in if, or we have reached an empty child slot, * and we should just put our new leaf in that. - * If we point to a T_TNODE, check if it matches our key. Note that - * a T_TNODE might be skipping any number of bits - its 'pos' need - * not be the parent's 'pos'+'bits'! - * - * If it does match the current key, get pos/bits from it, extract - * the index from our key, push the T_TNODE and walk the tree. - * - * If it doesn't, we have to replace it with a new T_TNODE. * - * If we point to a T_LEAF, it might or might not have the same key - * as we do. If it does, just change the value, update the T_LEAF's - * value, and return it. - * If it doesn't, we need to replace it with a T_TNODE. + * If we hit a node with a key that does't match then we should stop + * and create a new tnode to replace that node and insert ourselves + * and the other node into the new tnode. */ - - while (n != NULL && NODE_TYPE(n) == T_TNODE) { - tn = (struct tnode *) n; - - check_tnode(tn); - - if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { - tp = tn; - pos = tn->pos + tn->bits; - n = tnode_get_child(tn, - tkey_extract_bits(key, - tn->pos, - tn->bits)); - - BUG_ON(n && node_parent(n) != tn); - } else + while (n) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the "bits" in the prefix. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if !(index >> bits) + * we know the value is child index + * else + * we have a mismatch in skip bits and failed + */ + if (index >> n->bits) break; - } - /* - * n ----> NULL, LEAF or TNODE - * - * tp is n's (parent) ----> NULL or TNODE - */ - - BUG_ON(tp && IS_LEAF(tp)); - - /* Case 1: n is a leaf. Compare prefixes */ - - if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { - l = (struct leaf *) n; - li = leaf_info_new(plen); - - if (!li) - return NULL; + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) { + /* Case 1: n is a leaf, and prefixes match*/ + insert_leaf_info(n, li); + return fa_head; + } - fa_head = &li->falh; - insert_leaf_info(&l->list, li); - goto done; + tp = n; + n = tnode_get_child_rcu(n, index); } - l = leaf_new(); - if (!l) - return NULL; - - l->key = key; - li = leaf_info_new(plen); - - if (!li) { - free_leaf(l); + l = leaf_new(key); + if (!l) { + free_leaf_info(li); return NULL; } - fa_head = &li->falh; - insert_leaf_info(&l->list, li); - - if (t->trie && n == NULL) { - /* Case 2: n is NULL, and will just insert a new leaf */ + insert_leaf_info(l, li); - node_set_parent((struct rt_trie_node *)l, tp); - - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(tp, cindex, (struct rt_trie_node *)l); - } else { - /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ - /* - * Add a new tnode here - * first tnode need some special handling - */ - - if (n) { - pos = tp ? tp->pos+tp->bits : 0; - newpos = tkey_mismatch(key, pos, n->key); - tn = tnode_new(n->key, newpos, 1); - } else { - newpos = 0; - tn = tnode_new(key, newpos, 1); /* First tnode */ - } + /* Case 2: n is a LEAF or a TNODE and the key doesn't match. + * + * Add a new tnode here + * first tnode need some special handling + * leaves us in position for handling as case 3 + */ + if (n) { + struct tnode *tn; + tn = tnode_new(key, __fls(key ^ n->key), 1); if (!tn) { free_leaf_info(li); - free_leaf(l); + node_free(l); return NULL; } - node_set_parent((struct rt_trie_node *)tn, tp); + /* initialize routes out of node */ + NODE_INIT_PARENT(tn, tp); + put_child(tn, get_index(key, tn) ^ 1, n); - missbit = tkey_extract_bits(key, newpos, 1); - put_child(tn, missbit, (struct rt_trie_node *)l); - put_child(tn, 1-missbit, n); - - if (tp) { - cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(tp, cindex, (struct rt_trie_node *)tn); - } else { - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); - } + /* start adding routes into the node */ + put_child_root(tp, t, key, tn); + node_set_parent(n, tn); + /* parent now has a NULL spot where the leaf can go */ tp = tn; } - if (tp && tp->pos + tp->bits > 32) - pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", - tp, tp->pos, tp->bits, key, plen); - - /* Rebalance the trie */ + /* Case 3: n is NULL, and will just insert a new leaf */ + if (tp) { + NODE_INIT_PARENT(l, tp); + put_child(tp, get_index(key, tp), l); + trie_rebalance(t, tp); + } else { + rcu_assign_pointer(t->trie, l); + } - trie_rebalance(t, tp); -done: return fa_head; } @@ -1172,7 +1136,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) u8 tos = cfg->fc_tos; u32 key, mask; int err; - struct leaf *l; + struct tnode *l; if (plen > 32) return -EINVAL; @@ -1329,18 +1293,130 @@ err: return err; } +static inline t_key prefix_mismatch(t_key key, struct tnode *n) +{ + t_key prefix = n->key; + + return (key ^ prefix) & (prefix | -prefix); +} + /* should be called with rcu_read_lock */ -static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, - t_key key, const struct flowi4 *flp, - struct fib_result *res, int fib_flags) +int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, + struct fib_result *res, int fib_flags) { + struct trie *t = (struct trie *)tb->tb_data; +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie_use_stats __percpu *stats = t->stats; +#endif + const t_key key = ntohl(flp->daddr); + struct tnode *n, *pn; struct leaf_info *li; - struct hlist_head *hhead = &l->list; + t_key cindex; + + n = rcu_dereference(t->trie); + if (!n) + return -EAGAIN; + +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(stats->gets); +#endif + + pn = n; + cindex = 0; + + /* Step 1: Travel to the longest prefix match in the trie */ + for (;;) { + unsigned long index = get_index(key, n); + + /* This bit of code is a bit tricky but it combines multiple + * checks into a single check. The prefix consists of the + * prefix plus zeros for the "bits" in the prefix. The index + * is the difference between the key and this value. From + * this we can actually derive several pieces of data. + * if (index & (~0ul << bits)) + * we have a mismatch in skip bits and failed + * else + * we know the value is cindex + */ + if (index & (~0ul << n->bits)) + break; + + /* we have found a leaf. Prefixes have already been compared */ + if (IS_LEAF(n)) + goto found; + + /* only record pn and cindex if we are going to be chopping + * bits later. Otherwise we are just wasting cycles. + */ + if (n->slen > n->pos) { + pn = n; + cindex = index; + } + + n = tnode_get_child_rcu(n, index); + if (unlikely(!n)) + goto backtrace; + } + + /* Step 2: Sort out leaves and begin backtracing for longest prefix */ + for (;;) { + /* record the pointer where our next node pointer is stored */ + struct tnode __rcu **cptr = n->child; + + /* This test verifies that none of the bits that differ + * between the key and the prefix exist in the region of + * the lsb and higher in the prefix. + */ + if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos)) + goto backtrace; + + /* exit out and process leaf */ + if (unlikely(IS_LEAF(n))) + break; + + /* Don't bother recording parent info. Since we are in + * prefix match mode we will have to come back to wherever + * we started this traversal anyway + */ + + while ((n = rcu_dereference(*cptr)) == NULL) { +backtrace: +#ifdef CONFIG_IP_FIB_TRIE_STATS + if (!n) + this_cpu_inc(stats->null_node_hit); +#endif + /* If we are at cindex 0 there are no more bits for + * us to strip at this level so we must ascend back + * up one level to see if there are any more bits to + * be stripped there. + */ + while (!cindex) { + t_key pkey = pn->key; + + pn = node_parent_rcu(pn); + if (unlikely(!pn)) + return -EAGAIN; +#ifdef CONFIG_IP_FIB_TRIE_STATS + this_cpu_inc(stats->backtrack); +#endif + /* Get Child's index */ + cindex = get_index(pkey, pn); + } + + /* strip the least significant bit from the cindex */ + cindex &= cindex - 1; + + /* grab pointer for next child node */ + cptr = &pn->child[cindex]; + } + } - hlist_for_each_entry_rcu(li, hhead, hlist) { +found: + /* Step 3: Process the leaf, if that fails fall back to backtracing */ + hlist_for_each_entry_rcu(li, &n->list, hlist) { struct fib_alias *fa; - if (l->key != (key & li->mask_plen)) + if ((key ^ n->key) & li->mask_plen) continue; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -1355,9 +1431,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; - if (err) { + if (unlikely(err < 0)) { #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_passed++; + this_cpu_inc(stats->semantic_match_passed); #endif return err; } @@ -1371,241 +1447,48 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_passed++; -#endif + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&fi->fib_clntref); + res->prefixlen = li->plen; res->nh_sel = nhsel; res->type = fa->fa_type; - res->scope = fa->fa_info->fib_scope; + res->scope = fi->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &li->falh; - if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&fi->fib_clntref); - return 0; - } - } - -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_miss++; -#endif - } - - return 1; -} - -int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, - struct fib_result *res, int fib_flags) -{ - struct trie *t = (struct trie *) tb->tb_data; - int ret; - struct rt_trie_node *n; - struct tnode *pn; - unsigned int pos, bits; - t_key key = ntohl(flp->daddr); - unsigned int chopped_off; - t_key cindex = 0; - unsigned int current_prefix_length = KEYLENGTH; - struct tnode *cn; - t_key pref_mismatch; - - rcu_read_lock(); - - n = rcu_dereference(t->trie); - if (!n) - goto failed; - #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.gets++; + this_cpu_inc(stats->semantic_match_passed); #endif - - /* Just a leaf? */ - if (IS_LEAF(n)) { - ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); - goto found; - } - - pn = (struct tnode *) n; - chopped_off = 0; - - while (pn) { - pos = pn->pos; - bits = pn->bits; - - if (!chopped_off) - cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), - pos, bits); - - n = tnode_get_child_rcu(pn, cindex); - - if (n == NULL) { -#ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.null_node_hit++; -#endif - goto backtrace; - } - - if (IS_LEAF(n)) { - ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); - if (ret > 0) - goto backtrace; - goto found; - } - - cn = (struct tnode *)n; - - /* - * It's a tnode, and we can do some extra checks here if we - * like, to avoid descending into a dead-end branch. - * This tnode is in the parent's child array at index - * key[p_pos..p_pos+p_bits] but potentially with some bits - * chopped off, so in reality the index may be just a - * subprefix, padded with zero at the end. - * We can also take a look at any skipped bits in this - * tnode - everything up to p_pos is supposed to be ok, - * and the non-chopped bits of the index (se previous - * paragraph) are also guaranteed ok, but the rest is - * considered unknown. - * - * The skipped bits are key[pos+bits..cn->pos]. - */ - - /* If current_prefix_length < pos+bits, we are already doing - * actual prefix matching, which means everything from - * pos+(bits-chopped_off) onward must be zero along some - * branch of this subtree - otherwise there is *no* valid - * prefix present. Here we can only check the skipped - * bits. Remember, since we have already indexed into the - * parent's child array, we know that the bits we chopped of - * *are* zero. - */ - - /* NOTA BENE: Checking only skipped bits - for the new node here */ - - if (current_prefix_length < pos+bits) { - if (tkey_extract_bits(cn->key, current_prefix_length, - cn->pos - current_prefix_length) - || !(cn->child[0])) - goto backtrace; - } - - /* - * If chopped_off=0, the index is fully validated and we - * only need to look at the skipped bits for this, the new, - * tnode. What we actually want to do is to find out if - * these skipped bits match our key perfectly, or if we will - * have to count on finding a matching prefix further down, - * because if we do, we would like to have some way of - * verifying the existence of such a prefix at this point. - */ - - /* The only thing we can do at this point is to verify that - * any such matching prefix can indeed be a prefix to our - * key, and if the bits in the node we are inspecting that - * do not match our key are not ZERO, this cannot be true. - * Thus, find out where there is a mismatch (before cn->pos) - * and verify that all the mismatching bits are zero in the - * new tnode's key. - */ - - /* - * Note: We aren't very concerned about the piece of - * the key that precede pn->pos+pn->bits, since these - * have already been checked. The bits after cn->pos - * aren't checked since these are by definition - * "unknown" at this point. Thus, what we want to see - * is if we are about to enter the "prefix matching" - * state, and in that case verify that the skipped - * bits that will prevail throughout this subtree are - * zero, as they have to be if we are to find a - * matching prefix. - */ - - pref_mismatch = mask_pfx(cn->key ^ key, cn->pos); - - /* - * In short: If skipped bits in this node do not match - * the search key, enter the "prefix matching" - * state.directly. - */ - if (pref_mismatch) { - /* fls(x) = __fls(x) + 1 */ - int mp = KEYLENGTH - __fls(pref_mismatch) - 1; - - if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) - goto backtrace; - - if (current_prefix_length >= cn->pos) - current_prefix_length = mp; + return err; + } } - pn = (struct tnode *)n; /* Descend */ - chopped_off = 0; - continue; - -backtrace: - chopped_off++; - - /* As zero don't change the child key (cindex) */ - while ((chopped_off <= pn->bits) - && !(cindex & (1<<(chopped_off-1)))) - chopped_off++; - - /* Decrease current_... with bits chopped off */ - if (current_prefix_length > pn->pos + pn->bits - chopped_off) - current_prefix_length = pn->pos + pn->bits - - chopped_off; - - /* - * Either we do the actual chop off according or if we have - * chopped off all bits in this tnode walk up to our parent. - */ - - if (chopped_off <= pn->bits) { - cindex &= ~(1 << (chopped_off-1)); - } else { - struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); - if (!parent) - goto failed; - - /* Get Child's index */ - cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); - pn = parent; - chopped_off = 0; - #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.backtrack++; + this_cpu_inc(stats->semantic_match_miss); #endif - goto backtrace; - } } -failed: - ret = 1; -found: - rcu_read_unlock(); - return ret; + goto backtrace; } EXPORT_SYMBOL_GPL(fib_table_lookup); /* * Remove the leaf and return parent. */ -static void trie_leaf_remove(struct trie *t, struct leaf *l) +static void trie_leaf_remove(struct trie *t, struct tnode *l) { - struct tnode *tp = node_parent((struct rt_trie_node *) l); + struct tnode *tp = node_parent(l); pr_debug("entering trie_leaf_remove(%p)\n", l); if (tp) { - t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); - put_child(tp, cindex, NULL); + put_child(tp, get_index(l->key, tp), NULL); trie_rebalance(t, tp); - } else + } else { RCU_INIT_POINTER(t->trie, NULL); + } - free_leaf(l); + node_free(l); } /* @@ -1619,7 +1502,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; - struct leaf *l; + struct tnode *l; struct leaf_info *li; if (plen > 32) @@ -1684,7 +1567,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) tb->tb_num_default--; if (list_empty(fa_head)) { - hlist_del_rcu(&li->hlist); + remove_leaf_info(l, li); free_leaf_info(li); } @@ -1717,12 +1600,13 @@ static int trie_flush_list(struct list_head *head) return found; } -static int trie_flush_leaf(struct leaf *l) +static int trie_flush_leaf(struct tnode *l) { int found = 0; struct hlist_head *lih = &l->list; struct hlist_node *tmp; struct leaf_info *li = NULL; + unsigned char plen = KEYLENGTH; hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); @@ -1730,8 +1614,14 @@ static int trie_flush_leaf(struct leaf *l) if (list_empty(&li->falh)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); + continue; } + + plen = li->plen; } + + l->slen = KEYLENGTH - plen; + return found; } @@ -1739,63 +1629,57 @@ static int trie_flush_leaf(struct leaf *l) * Scan for the next right leaf starting at node p->child[idx] * Since we have back pointer, no recursion necessary. */ -static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) +static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) { do { - t_key idx; + unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0; - if (c) - idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1; - else - idx = 0; - - while (idx < 1u << p->bits) { + while (idx < tnode_child_length(p)) { c = tnode_get_child_rcu(p, idx++); if (!c) continue; if (IS_LEAF(c)) - return (struct leaf *) c; + return c; /* Rescan start scanning in new node */ - p = (struct tnode *) c; + p = c; idx = 0; } /* Node empty, walk back up to parent */ - c = (struct rt_trie_node *) p; + c = p; } while ((p = node_parent_rcu(c)) != NULL); return NULL; /* Root of trie */ } -static struct leaf *trie_firstleaf(struct trie *t) +static struct tnode *trie_firstleaf(struct trie *t) { - struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie); + struct tnode *n = rcu_dereference_rtnl(t->trie); if (!n) return NULL; if (IS_LEAF(n)) /* trie is just a leaf */ - return (struct leaf *) n; + return n; return leaf_walk_rcu(n, NULL); } -static struct leaf *trie_nextleaf(struct leaf *l) +static struct tnode *trie_nextleaf(struct tnode *l) { - struct rt_trie_node *c = (struct rt_trie_node *) l; - struct tnode *p = node_parent_rcu(c); + struct tnode *p = node_parent_rcu(l); if (!p) return NULL; /* trie with just one leaf */ - return leaf_walk_rcu(p, c); + return leaf_walk_rcu(p, l); } -static struct leaf *trie_leafindex(struct trie *t, int index) +static struct tnode *trie_leafindex(struct trie *t, int index) { - struct leaf *l = trie_firstleaf(t); + struct tnode *l = trie_firstleaf(t); while (l && index-- > 0) l = trie_nextleaf(l); @@ -1810,19 +1694,28 @@ static struct leaf *trie_leafindex(struct trie *t, int index) int fib_table_flush(struct fib_table *tb) { struct trie *t = (struct trie *) tb->tb_data; - struct leaf *l, *ll = NULL; + struct tnode *l, *ll = NULL; int found = 0; for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { found += trie_flush_leaf(l); - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } + ll = l; } - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } pr_debug("trie_flush found=%d\n", found); return found; @@ -1830,6 +1723,11 @@ int fib_table_flush(struct fib_table *tb) void fib_free_table(struct fib_table *tb) { +#ifdef CONFIG_IP_FIB_TRIE_STATS + struct trie *t = (struct trie *)tb->tb_data; + + free_percpu(t->stats); +#endif /* CONFIG_IP_FIB_TRIE_STATS */ kfree(tb); } @@ -1870,7 +1768,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, return skb->len; } -static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, +static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { struct leaf_info *li; @@ -1906,7 +1804,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { - struct leaf *l; + struct tnode *l; struct trie *t = (struct trie *) tb->tb_data; t_key key = cb->args[2]; int count = cb->args[3]; @@ -1952,7 +1850,7 @@ void __init fib_trie_init(void) 0, SLAB_PANIC, NULL); trie_leaf_kmem = kmem_cache_create("ip_fib_trie", - max(sizeof(struct leaf), + max(sizeof(struct tnode), sizeof(struct leaf_info)), 0, SLAB_PANIC, NULL); } @@ -1973,7 +1871,14 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_num_default = 0; t = (struct trie *) tb->tb_data; - memset(t, 0, sizeof(*t)); + RCU_INIT_POINTER(t->trie, NULL); +#ifdef CONFIG_IP_FIB_TRIE_STATS + t->stats = alloc_percpu(struct trie_use_stats); + if (!t->stats) { + kfree(tb); + tb = NULL; + } +#endif return tb; } @@ -1988,10 +1893,10 @@ struct fib_trie_iter { unsigned int depth; }; -static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) +static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter) { + unsigned long cindex = iter->index; struct tnode *tn = iter->tnode; - unsigned int cindex = iter->index; struct tnode *p; /* A single entry routing table */ @@ -2001,8 +1906,8 @@ static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); rescan: - while (cindex < (1<<tn->bits)) { - struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); + while (cindex < tnode_child_length(tn)) { + struct tnode *n = tnode_get_child_rcu(tn, cindex); if (n) { if (IS_LEAF(n)) { @@ -2010,7 +1915,7 @@ rescan: iter->index = cindex + 1; } else { /* push down one level */ - iter->tnode = (struct tnode *) n; + iter->tnode = n; iter->index = 0; ++iter->depth; } @@ -2021,9 +1926,9 @@ rescan: } /* Current node exhausted, pop back up */ - p = node_parent_rcu((struct rt_trie_node *)tn); + p = node_parent_rcu(tn); if (p) { - cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; + cindex = get_index(tn->key, p) + 1; tn = p; --iter->depth; goto rescan; @@ -2033,10 +1938,10 @@ rescan: return NULL; } -static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, +static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { - struct rt_trie_node *n; + struct tnode *n; if (!t) return NULL; @@ -2046,7 +1951,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, return NULL; if (IS_TNODE(n)) { - iter->tnode = (struct tnode *) n; + iter->tnode = n; iter->index = 0; iter->depth = 1; } else { @@ -2060,7 +1965,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct rt_trie_node *n; + struct tnode *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -2068,7 +1973,6 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) rcu_read_lock(); for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { if (IS_LEAF(n)) { - struct leaf *l = (struct leaf *)n; struct leaf_info *li; s->leaves++; @@ -2076,19 +1980,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, &l->list, hlist) + hlist_for_each_entry_rcu(li, &n->list, hlist) ++s->prefixes; } else { - const struct tnode *tn = (const struct tnode *) n; - int i; - s->tnodes++; - if (tn->bits < MAX_STAT_DEPTH) - s->nodesizes[tn->bits]++; - - for (i = 0; i < (1<<tn->bits); i++) - if (!tn->child[i]) - s->nullpointers++; + if (n->bits < MAX_STAT_DEPTH) + s->nodesizes[n->bits]++; + s->nullpointers += n->empty_children; } } rcu_read_unlock(); @@ -2111,7 +2009,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); seq_printf(seq, "\tLeaves: %u\n", stat->leaves); - bytes = sizeof(struct leaf) * stat->leaves; + bytes = sizeof(struct tnode) * stat->leaves; seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); bytes += sizeof(struct leaf_info) * stat->prefixes; @@ -2132,25 +2030,38 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct rt_trie_node *) * pointers; + bytes += sizeof(struct tnode *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } #ifdef CONFIG_IP_FIB_TRIE_STATS static void trie_show_usage(struct seq_file *seq, - const struct trie_use_stats *stats) + const struct trie_use_stats __percpu *stats) { + struct trie_use_stats s = { 0 }; + int cpu; + + /* loop through all of the CPUs and gather up the stats */ + for_each_possible_cpu(cpu) { + const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); + + s.gets += pcpu->gets; + s.backtrack += pcpu->backtrack; + s.semantic_match_passed += pcpu->semantic_match_passed; + s.semantic_match_miss += pcpu->semantic_match_miss; + s.null_node_hit += pcpu->null_node_hit; + s.resize_node_skipped += pcpu->resize_node_skipped; + } + seq_printf(seq, "\nCounters:\n---------\n"); - seq_printf(seq, "gets = %u\n", stats->gets); - seq_printf(seq, "backtracks = %u\n", stats->backtrack); + seq_printf(seq, "gets = %u\n", s.gets); + seq_printf(seq, "backtracks = %u\n", s.backtrack); seq_printf(seq, "semantic match passed = %u\n", - stats->semantic_match_passed); - seq_printf(seq, "semantic match miss = %u\n", - stats->semantic_match_miss); - seq_printf(seq, "null node hit= %u\n", stats->null_node_hit); - seq_printf(seq, "skipped node resize = %u\n\n", - stats->resize_node_skipped); + s.semantic_match_passed); + seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); + seq_printf(seq, "null node hit= %u\n", s.null_node_hit); + seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped); } #endif /* CONFIG_IP_FIB_TRIE_STATS */ @@ -2173,7 +2084,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Basic info: size of leaf:" " %Zd bytes, size of tnode: %Zd bytes.\n", - sizeof(struct leaf), sizeof(struct tnode)); + sizeof(struct tnode), sizeof(struct tnode)); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; @@ -2191,7 +2102,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) trie_collect_stats(t, &stat); trie_show_stats(seq, &stat); #ifdef CONFIG_IP_FIB_TRIE_STATS - trie_show_usage(seq, &t->stats); + trie_show_usage(seq, t->stats); #endif } } @@ -2212,7 +2123,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2224,7 +2135,7 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { - struct rt_trie_node *n; + struct tnode *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2253,7 +2164,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct rt_trie_node *n; + struct tnode *n; ++*pos; /* next node in same table */ @@ -2339,29 +2250,26 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct rt_trie_node *n = v; + struct tnode *n = v; if (!node_parent_rcu(n)) fib_table_print(seq, iter->tb); if (IS_TNODE(n)) { - struct tnode *tn = (struct tnode *) n; - __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); + __be32 prf = htonl(n->key); seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- %pI4/%d %d %d %d\n", - &prf, tn->pos, tn->bits, tn->full_children, - tn->empty_children); - + seq_printf(seq, " +-- %pI4/%zu %u %u %u\n", + &prf, KEYLENGTH - n->pos - n->bits, n->bits, + n->full_children, n->empty_children); } else { - struct leaf *l = (struct leaf *) n; struct leaf_info *li; - __be32 val = htonl(l->key); + __be32 val = htonl(n->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &n->list, hlist) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2411,9 +2319,9 @@ struct fib_route_iter { t_key key; }; -static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) +static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) { - struct leaf *l = NULL; + struct tnode *l = NULL; struct trie *t = iter->main_trie; /* use cache location of last found key */ @@ -2458,7 +2366,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; - struct leaf *l = v; + struct tnode *l = v; ++*pos; if (v == SEQ_START_TOKEN) { @@ -2504,7 +2412,7 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info */ static int fib_route_seq_show(struct seq_file *seq, void *v) { - struct leaf *l = v; + struct tnode *l = v; struct leaf_info *li; if (v == SEQ_START_TOKEN) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index b986298a7ba3..ff069f6597ac 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -22,14 +22,18 @@ static LIST_HEAD(fou_list); struct fou { struct socket *sock; u8 protocol; + u8 flags; u16 port; struct udp_offload udp_offloads; struct list_head list; }; +#define FOU_F_REMCSUM_NOPARTIAL BIT(0) + struct fou_cfg { u16 type; u8 protocol; + u8 flags; struct udp_port_cfg udp_config; }; @@ -64,32 +68,20 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, - void *data, size_t hdrlen, u8 ipproto) + void *data, size_t hdrlen, u8 ipproto, + bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; - - if (skb->remcsum_offload) { - /* Already processed in GRO path */ - skb->remcsum_offload = 0; - return guehdr; - } if (!pskb_may_pull(skb, plen)) return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) - __skb_checksum_complete(skb); - - delta = remcsum_adjust((void *)guehdr + hdrlen, - skb->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); + skb_remcsum_process(skb, (void *)guehdr + hdrlen, + start, offset, nopartial); return guehdr; } @@ -150,7 +142,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_remcsum(skb, guehdr, data + doffset, - hdrlen, guehdr->proto_ctype); + hdrlen, guehdr->proto_ctype, + !!(fou->flags & + FOU_F_REMCSUM_NOPARTIAL)); if (!guehdr) goto drop; @@ -174,7 +168,8 @@ drop: } static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb) + struct sk_buff *skb, + struct udp_offload *uoff) { const struct net_offload *ops; struct sk_buff **pp = NULL; @@ -195,7 +190,8 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff) +static int fou_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) { const struct net_offload *ops; u8 proto = NAPI_GRO_CB(skb)->proto; @@ -220,16 +216,16 @@ out_unlock: static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, struct guehdr *guehdr, void *data, - size_t hdrlen, u8 ipproto) + size_t hdrlen, u8 ipproto, + struct gro_remcsum *grc, bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - __wsum delta; if (skb->remcsum_offload) - return guehdr; + return NULL; if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; @@ -241,12 +237,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return NULL; } - delta = remcsum_adjust((void *)guehdr + hdrlen, - NAPI_GRO_CB(skb)->csum, start, offset); - - /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); - NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, + start, offset, grc, nopartial); skb->remcsum_offload = 1; @@ -254,7 +246,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, } static struct sk_buff **gue_gro_receive(struct sk_buff **head, - struct sk_buff *skb) + struct sk_buff *skb, + struct udp_offload *uoff) { const struct net_offload **offloads; const struct net_offload *ops; @@ -265,6 +258,10 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, void *data; u16 doffset = 0; int flush = 1; + struct fou *fou = container_of(uoff, struct fou, udp_offloads); + struct gro_remcsum grc; + + skb_gro_remcsum_init(&grc); off = skb_gro_offset(skb); len = off + sizeof(*guehdr); @@ -306,7 +303,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_gro_remcsum(skb, off, guehdr, data + doffset, hdrlen, - guehdr->proto_ctype); + guehdr->proto_ctype, &grc, + !!(fou->flags & + FOU_F_REMCSUM_NOPARTIAL)); if (!guehdr) goto out; @@ -356,11 +355,13 @@ out_unlock: rcu_read_unlock(); out: NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, &grc); return pp; } -static int gue_gro_complete(struct sk_buff *skb, int nhoff) +static int gue_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) { const struct net_offload **offloads; struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); @@ -465,6 +466,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; + fou->flags = cfg->flags; fou->port = cfg->udp_config.local_udp_port; /* Initial for fou type */ @@ -490,7 +492,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_user_data = fou; fou->sock = sock; - udp_set_convert_csum(sk, true); + inet_inc_convert_csum(sk); sk->sk_allocation = GFP_ATOMIC; @@ -551,6 +553,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, [FOU_ATTR_TYPE] = { .type = NLA_U8, }, + [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, }; static int parse_nl_config(struct genl_info *info, @@ -581,6 +584,9 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_TYPE]) cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); + if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) + cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; + return 0; } diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 95e47c97585e..5a4828ba05ad 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -17,7 +17,7 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/skbuff.h> -#include <linux/rculist.h> +#include <linux/list.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/ip.h> @@ -26,8 +26,8 @@ #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> -#include <linux/hash.h> #include <linux/ethtool.h> +#include <linux/mutex.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/ip.h> @@ -50,38 +50,30 @@ #include <net/ip6_checksum.h> #endif -#define PORT_HASH_BITS 8 -#define PORT_HASH_SIZE (1<<PORT_HASH_BITS) +/* Protects sock_list and refcounts. */ +static DEFINE_MUTEX(geneve_mutex); /* per-network namespace private data for this module */ struct geneve_net { - struct hlist_head sock_list[PORT_HASH_SIZE]; - spinlock_t sock_lock; /* Protects sock_list */ + struct list_head sock_list; }; static int geneve_net_id; -static struct workqueue_struct *geneve_wq; - static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); } -static struct hlist_head *gs_head(struct net *net, __be16 port) +static struct geneve_sock *geneve_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct geneve_net *gn = net_generic(net, geneve_net_id); - - return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; -} - -/* Find geneve socket based on network namespace and UDP port */ -static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) -{ struct geneve_sock *gs; - hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) { - if (inet_sk(gs->sock->sk)->inet_sport == port) + list_for_each_entry(gs, &gn->sock_list, list) { + if (inet_sk(gs->sock->sk)->inet_sport == port && + inet_sk(gs->sock->sk)->sk.sk_family == family) return gs; } @@ -115,21 +107,25 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool xnet) + bool csum, bool xnet) { struct genevehdr *gnvh; int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); + skb = udp_tunnel_handle_offloads(skb, csum); + if (IS_ERR(skb)) + return PTR_ERR(skb); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) + if (unlikely(err)) { + kfree_skb(skb); return err; + } skb = vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) @@ -140,11 +136,107 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, - tos, ttl, df, src_port, dst_port, xnet); + return udp_tunnel_xmit_skb(rt, skb, src, dst, + tos, ttl, df, src_port, dst_port, xnet, + !csum); } EXPORT_SYMBOL_GPL(geneve_xmit_skb); +static int geneve_hlen(struct genevehdr *gh) +{ + return sizeof(*gh) + gh->opt_len * 4; +} + +static struct sk_buff **geneve_gro_receive(struct sk_buff **head, + struct sk_buff *skb, + struct udp_offload *uoff) +{ + struct sk_buff *p, **pp = NULL; + struct genevehdr *gh, *gh2; + unsigned int hlen, gh_len, off_gnv; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_gnv = skb_gro_offset(skb); + hlen = off_gnv + sizeof(*gh); + gh = skb_gro_header_fast(skb, off_gnv); + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + if (gh->ver != GENEVE_VER || gh->oam) + goto out; + gh_len = geneve_hlen(gh); + + hlen = off_gnv + gh_len; + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + gh2 = (struct genevehdr *)(p->data + off_gnv); + if (gh->opt_len != gh2->opt_len || + memcmp(gh, gh2, gh_len)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, gh_len); + skb_gro_postpull_rcsum(skb, gh, gh_len); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int geneve_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) +{ + struct genevehdr *gh; + struct packet_offload *ptype; + __be16 type; + int gh_len; + int err = -ENOSYS; + + udp_tunnel_gro_complete(skb, nhoff); + + gh = (struct genevehdr *)(skb->data + nhoff); + gh_len = geneve_hlen(gh); + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); + + rcu_read_unlock(); + return err; +} + static void geneve_notify_add_rx_port(struct geneve_sock *gs) { struct sock *sk = gs->sock->sk; @@ -210,15 +302,6 @@ error: return 1; } -static void geneve_del_work(struct work_struct *work) -{ - struct geneve_sock *gs = container_of(work, struct geneve_sock, - del_work); - - udp_tunnel_sock_release(gs->sock); - kfree_rcu(gs, rcu); -} - static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port) { @@ -259,8 +342,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, if (!gs) return ERR_PTR(-ENOMEM); - INIT_WORK(&gs->del_work, geneve_del_work); - sock = geneve_create_sock(net, ipv6, port); if (IS_ERR(sock)) { kfree(gs); @@ -268,19 +349,15 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, } gs->sock = sock; - atomic_set(&gs->refcnt, 1); + gs->refcnt = 1; gs->rcv = rcv; gs->rcv_data = data; /* Initialize the geneve udp offloads structure */ gs->udp_offloads.port = port; - gs->udp_offloads.callbacks.gro_receive = NULL; - gs->udp_offloads.callbacks.gro_complete = NULL; - - spin_lock(&gn->sock_lock); - hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); + gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; + gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; geneve_notify_add_rx_port(gs); - spin_unlock(&gn->sock_lock); /* Mark socket as an encapsulation socket */ tunnel_cfg.sk_user_data = gs; @@ -289,6 +366,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); + list_add(&gs->list, &gn->sock_list); + return gs; } @@ -296,25 +375,21 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool no_share, bool ipv6) { - struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; - gs = geneve_socket_create(net, port, rcv, data, ipv6); - if (!IS_ERR(gs)) - return gs; - - if (no_share) /* Return error if sharing is not allowed. */ - return ERR_PTR(-EINVAL); + mutex_lock(&geneve_mutex); - spin_lock(&gn->sock_lock); - gs = geneve_find_sock(net, port); - if (gs && ((gs->rcv != rcv) || - !atomic_add_unless(&gs->refcnt, 1, 0))) + gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); + if (gs) { + if (!no_share && gs->rcv == rcv) + gs->refcnt++; + else gs = ERR_PTR(-EBUSY); - spin_unlock(&gn->sock_lock); + } else { + gs = geneve_socket_create(net, port, rcv, data, ipv6); + } - if (!gs) - gs = ERR_PTR(-EINVAL); + mutex_unlock(&geneve_mutex); return gs; } @@ -322,37 +397,32 @@ EXPORT_SYMBOL_GPL(geneve_sock_add); void geneve_sock_release(struct geneve_sock *gs) { - struct net *net = sock_net(gs->sock->sk); - struct geneve_net *gn = net_generic(net, geneve_net_id); + mutex_lock(&geneve_mutex); - if (!atomic_dec_and_test(&gs->refcnt)) - return; + if (--gs->refcnt) + goto unlock; - spin_lock(&gn->sock_lock); - hlist_del_rcu(&gs->hlist); + list_del(&gs->list); geneve_notify_del_rx_port(gs); - spin_unlock(&gn->sock_lock); + udp_tunnel_sock_release(gs->sock); + kfree_rcu(gs, rcu); - queue_work(geneve_wq, &gs->del_work); +unlock: + mutex_unlock(&geneve_mutex); } EXPORT_SYMBOL_GPL(geneve_sock_release); static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); - unsigned int h; - spin_lock_init(&gn->sock_lock); - - for (h = 0; h < PORT_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&gn->sock_list[h]); + INIT_LIST_HEAD(&gn->sock_list); return 0; } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit = NULL, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; @@ -361,10 +431,6 @@ static int __init geneve_init_module(void) { int rc; - geneve_wq = alloc_workqueue("geneve", 0, 0); - if (!geneve_wq) - return -ENOMEM; - rc = register_pernet_subsys(&geneve_net_ops); if (rc) return rc; @@ -373,11 +439,10 @@ static int __init geneve_init_module(void) return 0; } -late_initcall(geneve_init_module); +module_init(geneve_init_module); static void __exit geneve_cleanup_module(void) { - destroy_workqueue(geneve_wq); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 36f5584d93c5..5e564014a0b7 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -205,7 +205,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; */ static struct sock *icmp_sk(struct net *net) { - return net->ipv4.icmp_sk[smp_processor_id()]; + return *this_cpu_ptr(net->ipv4.icmp_sk); } static inline struct sock *icmp_xmit_lock(struct net *net) @@ -1140,8 +1140,8 @@ static void __net_exit icmp_sk_exit(struct net *net) int i; for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); net->ipv4.icmp_sk = NULL; } @@ -1149,9 +1149,8 @@ static int __net_init icmp_sk_init(struct net *net) { int i, err; - net->ipv4.icmp_sk = - kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv4.icmp_sk == NULL) + net->ipv4.icmp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { @@ -1162,7 +1161,7 @@ static int __net_init icmp_sk_init(struct net *net) if (err < 0) goto fail; - net->ipv4.icmp_sk[i] = sk; + *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff/skb_shared_info struct overhead. @@ -1203,8 +1202,8 @@ static int __net_init icmp_sk_init(struct net *net) fail: for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); return err; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e34dccbc4d70..81751f12645f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -203,7 +203,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, icsk->icsk_ca_ops->get_info(sk, ext, skb); out: - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; errout: nlmsg_cancel(skb, nlh); @@ -271,7 +272,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, @@ -758,7 +760,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, } #endif - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 3a83ce5efa80..787b3c294ce6 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -129,7 +129,8 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) + if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr && + !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4f4bf5b99686..6207275fc749 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -659,12 +659,12 @@ static bool ipgre_netlink_encap_parms(struct nlattr *data[], if (data[IFLA_GRE_ENCAP_SPORT]) { ret = true; - ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]); + ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); } if (data[IFLA_GRE_ENCAP_DPORT]) { ret = true; - ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]); + ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); } return ret; @@ -673,6 +673,7 @@ static bool ipgre_netlink_encap_parms(struct nlattr *data[], static int gre_tap_init(struct net_device *dev) { __gre_tunnel_init(dev); + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; return ip_tunnel_init(dev); } @@ -785,10 +786,10 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, t->encap.type) || - nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT, - t->encap.sport) || - nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT, - t->encap.dport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, + t->encap.sport) || + nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, + t->encap.dport) || nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, t->encap.flags)) goto nla_put_failure; @@ -828,6 +829,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { @@ -842,6 +844,7 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static int __net_init ipgre_tap_init_net(struct net *net) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b50861b22b6b..d68199d9b2b0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -755,13 +755,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk struct msghdr *msg = from; if (skb->ip_summed == CHECKSUM_PARTIAL) { - /* XXX: stripping const */ - if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0) + if (copy_from_iter(to, len, &msg->msg_iter) != len) return -EFAULT; } else { __wsum csum = 0; - /* XXX: stripping const */ - if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0) + if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); } @@ -1506,23 +1504,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, /* * Generic function to send a packet as reply to another packet. * Used to send some TCP resets/acks so far. - * - * Use a fake percpu inet socket to avoid false sharing and contention. */ -static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { - .sk = { - .__sk_common = { - .skc_refcnt = ATOMIC_INIT(1), - }, - .sk_wmem_alloc = ATOMIC_INIT(1), - .sk_allocation = GFP_ATOMIC, - .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), - }, - .pmtudisc = IP_PMTUDISC_WANT, - .uc_ttl = -1, -}; - -void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, @@ -1532,9 +1515,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, struct ipcm_cookie ipc; struct flowi4 fl4; struct rtable *rt = skb_rtable(skb); + struct net *net = sock_net(sk); struct sk_buff *nskb; - struct sock *sk; - struct inet_sock *inet; int err; if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) @@ -1565,15 +1547,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, if (IS_ERR(rt)) return; - inet = &get_cpu_var(unicast_sock); + inet_sk(sk)->tos = arg->tos; - inet->tos = arg->tos; - sk = &inet->sk; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; - sock_net_set(sk, net); - __skb_queue_head_init(&sk->sk_write_queue); sk->sk_sndbuf = sysctl_wmem_default; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); @@ -1589,13 +1567,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; - skb_orphan(nskb); skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } out: - put_cpu_var(unicast_sock); - ip_rt_put(rt); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8a89c738b7a3..31d8c71986b4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -37,6 +37,7 @@ #include <net/route.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/checksum.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/transp_v6.h> #endif @@ -45,14 +46,6 @@ #include <linux/errqueue.h> #include <asm/uaccess.h> -#define IP_CMSG_PKTINFO 1 -#define IP_CMSG_TTL 2 -#define IP_CMSG_TOS 4 -#define IP_CMSG_RECVOPTS 8 -#define IP_CMSG_RETOPTS 16 -#define IP_CMSG_PASSSEC 32 -#define IP_CMSG_ORIGDSTADDR 64 - /* * SOL_IP control messages. */ @@ -104,6 +97,20 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, + int offset) +{ + __wsum csum = skb->csum; + + if (skb->ip_summed != CHECKSUM_COMPLETE) + return; + + if (offset != 0) + csum = csum_sub(csum, csum_partial(skb->data, offset, 0)); + + put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); +} + static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; @@ -144,47 +151,73 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); } -void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) +void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, + int offset) { struct inet_sock *inet = inet_sk(skb->sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ - if (flags & 1) + if (flags & IP_CMSG_PKTINFO) { ip_cmsg_recv_pktinfo(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_PKTINFO; + if (!flags) + return; + } + + if (flags & IP_CMSG_TTL) { ip_cmsg_recv_ttl(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_TTL; + if (!flags) + return; + } + + if (flags & IP_CMSG_TOS) { ip_cmsg_recv_tos(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_TOS; + if (!flags) + return; + } + + if (flags & IP_CMSG_RECVOPTS) { ip_cmsg_recv_opts(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_RECVOPTS; + if (!flags) + return; + } + + if (flags & IP_CMSG_RETOPTS) { ip_cmsg_recv_retopts(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_RETOPTS; + if (!flags) + return; + } + + if (flags & IP_CMSG_PASSSEC) { ip_cmsg_recv_security(msg, skb); - if ((flags >>= 1) == 0) - return; - if (flags & 1) + flags &= ~IP_CMSG_PASSSEC; + if (!flags) + return; + } + + if (flags & IP_CMSG_ORIGDSTADDR) { ip_cmsg_recv_dstaddr(msg, skb); + flags &= ~IP_CMSG_ORIGDSTADDR; + if (!flags) + return; + } + + if (flags & IP_CMSG_CHECKSUM) + ip_cmsg_recv_checksum(msg, skb, offset); } -EXPORT_SYMBOL(ip_cmsg_recv); +EXPORT_SYMBOL(ip_cmsg_recv_offset); int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6) @@ -450,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin) { + if (sin && skb->len) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); @@ -461,17 +494,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; - sin->sin_family = AF_UNSPEC; - - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || - ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { - struct inet_sock *inet = inet_sk(sk); + memset(sin, 0, sizeof(*sin)); + if (skb->len && + (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - sin->sin_port = 0; - memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - if (inet->cmsg_flags) + if (inet_sk(sk)->cmsg_flags) ip_cmsg_recv(msg, skb); } @@ -522,6 +552,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_ALL: case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: + case IP_CHECKSUM: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -619,6 +650,19 @@ static int do_ip_setsockopt(struct sock *sk, int level, else inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; break; + case IP_CHECKSUM: + if (val) { + if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { + inet_inc_convert_csum(sk); + inet->cmsg_flags |= IP_CMSG_CHECKSUM; + } + } else { + if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { + inet_dec_convert_csum(sk); + inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; + } + } + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -1222,6 +1266,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_RECVORIGDSTADDR: val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; break; + case IP_CHECKSUM: + val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; + break; case IP_TOS: val = inet->tos; break; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d3e447936720..2cd08280c77b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -972,6 +972,14 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL_GPL(ip_tunnel_dellink); +struct net *ip_tunnel_get_link_net(const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + return tunnel->net; +} +EXPORT_SYMBOL(ip_tunnel_get_link_net); + int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1a7e979e80ba..94efe148181c 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -531,6 +531,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = vti_get_size, .fill_info = vti_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static int __init vti_init(void) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 7fa18bc7e47f..b26376ef87f6 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -209,9 +209,9 @@ static int __init ic_open_devs(void) last = &ic_first_dev; rtnl_lock(); - /* bring loopback device up first */ + /* bring loopback and DSA master network devices up first */ for_each_netdev(&init_net, dev) { - if (!(dev->flags & IFF_LOOPBACK)) + if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev)) continue; if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) pr_err("IP-Config: Failed to open %s\n", dev->name); @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev) { + if (dev != ic_dev && !netdev_uses_dsa(dev)) { DBG(("IP-Config: Downing %s\n", dev->name)); dev_change_flags(dev, d->flags); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 40403114f00a..915d215a7d14 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -366,12 +366,12 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], if (data[IFLA_IPTUN_ENCAP_SPORT]) { ret = true; - ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]); + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); } if (data[IFLA_IPTUN_ENCAP_DPORT]) { ret = true; - ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]); + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); } return ret; @@ -460,10 +460,10 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT, - tunnel->encap.sport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, - tunnel->encap.dport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, + tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, + tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; @@ -498,6 +498,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = { .dellink = ip_tunnel_dellink, .get_size = ipip_get_size, .fill_info = ipip_fill_info, + .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel ipip_handler __read_mostly = { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c8034587859d..9d78427652d2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2290,7 +2290,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (err < 0 && err != -ENOENT) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index ff2d23d8c87a..6ecfce63201a 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -27,10 +27,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { - mr.range[0].min.all = (__force __be16) - data[priv->sreg_proto_min].data[0]; - mr.range[0].max.all = (__force __be16) - data[priv->sreg_proto_max].data[0]; + mr.range[0].min.all = + *(__be16 *)&data[priv->sreg_proto_min].data[0]; + mr.range[0].max.all = + *(__be16 *)&data[priv->sreg_proto_max].data[0]; mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index c0d82f78d364..e9f66e1cda50 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -599,18 +599,18 @@ int ping_getfrag(void *from, char *to, struct pingfakehdr *pfh = (struct pingfakehdr *)from; if (offset == 0) { - if (fraglen < sizeof(struct icmphdr)) + fraglen -= sizeof(struct icmphdr); + if (fraglen < 0) BUG(); - if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr), - pfh->iov, 0, fraglen - sizeof(struct icmphdr), - &pfh->wcheck)) + if (csum_and_copy_from_iter(to + sizeof(struct icmphdr), + fraglen, &pfh->wcheck, + &pfh->msg->msg_iter) != fraglen) return -EFAULT; } else if (offset < sizeof(struct icmphdr)) { BUG(); } else { - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) + if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck, + &pfh->msg->msg_iter) != fraglen) return -EFAULT; } @@ -811,8 +811,7 @@ back_from_confirm: pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; - /* XXX: stripping const */ - pfh.iov = (struct iovec *)msg->msg_iter.iov; + pfh.msg = msg; pfh.wcheck = 0; pfh.family = AF_INET; @@ -966,8 +965,11 @@ bool ping_rcv(struct sk_buff *skb) sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + pr_debug("rcv on socket %p\n", sk); - ping_queue_rcv_skb(sk, skb_get(skb)); + if (skb2) + ping_queue_rcv_skb(sk, skb2); sock_put(sk); return true; } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8f9cd200ce20..d8953ef0770c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -292,6 +292,12 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND), SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT), SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND), + SNMP_MIB_ITEM("TCPACKSkippedSynRecv", LINUX_MIB_TCPACKSKIPPEDSYNRECV), + SNMP_MIB_ITEM("TCPACKSkippedPAWS", LINUX_MIB_TCPACKSKIPPEDPAWS), + SNMP_MIB_ITEM("TCPACKSkippedSeq", LINUX_MIB_TCPACKSKIPPEDSEQ), + SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), + SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), + SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0bb68df5055d..f027a708b7e0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -337,7 +337,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, - void *from, size_t length, + struct msghdr *msg, size_t length, struct rtable **rtp, unsigned int flags) { @@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->transport_header = skb->network_header; err = -EFAULT; - if (memcpy_fromiovecend((void *)iph, from, 0, length)) + if (memcpy_from_msg(iph, msg, length)) goto error_free; iphlen = iph->ihl * 4; @@ -625,8 +625,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, back_from_confirm: if (inet->hdrincl) - /* XXX: stripping const */ - err = raw_send_hdrinc(sk, &fl4, (struct iovec *)msg->msg_iter.iov, len, + err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags); else { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a2155b02602..ad5064362c5c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -966,6 +966,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (dst->dev->mtu < mtu) return; + if (rt->rt_pmtu && rt->rt_pmtu < mtu) + return; + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; @@ -1325,14 +1328,22 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) return ret; } -static DEFINE_SPINLOCK(rt_uncached_lock); -static LIST_HEAD(rt_uncached_list); +struct uncached_list { + spinlock_t lock; + struct list_head head; +}; + +static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); static void rt_add_uncached_list(struct rtable *rt) { - spin_lock_bh(&rt_uncached_lock); - list_add_tail(&rt->rt_uncached, &rt_uncached_list); - spin_unlock_bh(&rt_uncached_lock); + struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); + + rt->rt_uncached_list = ul; + + spin_lock_bh(&ul->lock); + list_add_tail(&rt->rt_uncached, &ul->head); + spin_unlock_bh(&ul->lock); } static void ipv4_dst_destroy(struct dst_entry *dst) @@ -1340,27 +1351,32 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; if (!list_empty(&rt->rt_uncached)) { - spin_lock_bh(&rt_uncached_lock); + struct uncached_list *ul = rt->rt_uncached_list; + + spin_lock_bh(&ul->lock); list_del(&rt->rt_uncached); - spin_unlock_bh(&rt_uncached_lock); + spin_unlock_bh(&ul->lock); } } void rt_flush_dev(struct net_device *dev) { - if (!list_empty(&rt_uncached_list)) { - struct net *net = dev_net(dev); - struct rtable *rt; + struct net *net = dev_net(dev); + struct rtable *rt; + int cpu; + + for_each_possible_cpu(cpu) { + struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); - spin_lock_bh(&rt_uncached_lock); - list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + spin_lock_bh(&ul->lock); + list_for_each_entry(rt, &ul->head, rt_uncached) { if (rt->dst.dev != dev) continue; rt->dst.dev = net->loopback_dev; dev_hold(rt->dst.dev); dev_put(dev); } - spin_unlock_bh(&rt_uncached_lock); + spin_unlock_bh(&ul->lock); } } @@ -1554,11 +1570,10 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && + skb->protocol == htons(ETH_P_IP) && (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { - flags |= RTCF_DOREDIRECT; - do_cache = false; - } + inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) + IPCB(skb)->flags |= IPSKB_DOREDIRECT; if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -2303,6 +2318,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; + if (IPCB(skb)->flags & IPSKB_DOREDIRECT) + r->rtm_flags |= RTCF_DOREDIRECT; if (nla_put_be32(skb, RTA_DST, dst)) goto nla_put_failure; @@ -2377,7 +2394,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -2469,7 +2487,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) err = rt_fill_info(net, dst, src, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (err <= 0) + if (err < 0) goto errout_free; err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); @@ -2717,6 +2735,7 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; int __init ip_rt_init(void) { int rc = 0; + int cpu; ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); if (!ip_idents) @@ -2724,6 +2743,12 @@ int __init ip_rt_init(void) prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + for_each_possible_cpu(cpu) { + struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); + + INIT_LIST_HEAD(&ul->head); + spin_lock_init(&ul->lock); + } #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e0ee384a448f..d151539da8e6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -604,20 +604,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_congestion_control, }, { - .procname = "tcp_mtu_probing", - .data = &sysctl_tcp_mtu_probing, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_base_mss", - .data = &sysctl_tcp_base_mss, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_workaround_signed_windows", .data = &sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(int), @@ -729,6 +715,13 @@ static struct ctl_table ipv4_table[] = { .extra2 = &one, }, { + .procname = "tcp_invalid_ratelimit", + .data = &sysctl_tcp_invalid_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { .procname = "icmp_msgs_per_sec", .data = &sysctl_icmp_msgs_per_sec, .maxlen = sizeof(int), @@ -876,6 +869,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_mtu_probing", + .data = &init_net.ipv4.sysctl_tcp_mtu_probing, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_base_mss", + .data = &init_net.ipv4.sysctl_tcp_base_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3075723c729b..9d72a0fcd928 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - const struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags, err, copied = 0; - int mss_now = 0, size_goal, copied_syn = 0, offset = 0; + int flags, err, copied = 0; + int mss_now = 0, size_goal, copied_syn = 0; bool sg; long timeo; @@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; else if (err) goto out_err; - offset = copied_syn; } timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, mss_now = tcp_send_mss(sk, &size_goal, flags); /* Ok commence sending. */ - iovlen = msg->msg_iter.nr_segs; - iov = msg->msg_iter.iov; copied = 0; err = -EPIPE; @@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = !!(sk->sk_route_caps & NETIF_F_SG); - while (--iovlen >= 0) { - size_t seglen = iov->iov_len; - unsigned char __user *from = iov->iov_base; + while (iov_iter_count(&msg->msg_iter)) { + int copy = 0; + int max = size_goal; - iov++; - if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ - if (offset >= seglen) { - offset -= seglen; - continue; - } - seglen -= offset; - from += offset; - offset = 0; + skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; } - while (seglen > 0) { - int copy = 0; - int max = size_goal; - - skb = tcp_write_queue_tail(sk); - if (tcp_send_head(sk)) { - if (skb->ip_summed == CHECKSUM_NONE) - max = mss_now; - copy = max - skb->len; - } - - if (copy <= 0) { + if (copy <= 0) { new_segment: - /* Allocate new segment. If the interface is SG, - * allocate skb fitting to single page. - */ - if (!sk_stream_memory_free(sk)) - goto wait_for_sndbuf; + /* Allocate new segment. If the interface is SG, + * allocate skb fitting to single page. + */ + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg), - sk->sk_allocation); - if (!skb) - goto wait_for_memory; + skb = sk_stream_alloc_skb(sk, + select_size(sk, sg), + sk->sk_allocation); + if (!skb) + goto wait_for_memory; - /* - * Check whether we can use HW checksum. - */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_PARTIAL; + /* + * Check whether we can use HW checksum. + */ + if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + skb->ip_summed = CHECKSUM_PARTIAL; - skb_entail(sk, skb); - copy = size_goal; - max = size_goal; + skb_entail(sk, skb); + copy = size_goal; + max = size_goal; - /* All packets are restored as if they have - * already been sent. skb_mstamp isn't set to - * avoid wrong rtt estimation. - */ - if (tp->repair) - TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; - } + /* All packets are restored as if they have + * already been sent. skb_mstamp isn't set to + * avoid wrong rtt estimation. + */ + if (tp->repair) + TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; + } - /* Try to append data to the end of skb. */ - if (copy > seglen) - copy = seglen; - - /* Where to copy to? */ - if (skb_availroom(skb) > 0) { - /* We have some space in skb head. Superb! */ - copy = min_t(int, copy, skb_availroom(skb)); - err = skb_add_data_nocache(sk, skb, from, copy); - if (err) - goto do_fault; - } else { - bool merge = true; - int i = skb_shinfo(skb)->nr_frags; - struct page_frag *pfrag = sk_page_frag(sk); - - if (!sk_page_frag_refill(sk, pfrag)) - goto wait_for_memory; - - if (!skb_can_coalesce(skb, i, pfrag->page, - pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { - tcp_mark_push(tp, skb); - goto new_segment; - } - merge = false; - } + /* Try to append data to the end of skb. */ + if (copy > iov_iter_count(&msg->msg_iter)) + copy = iov_iter_count(&msg->msg_iter); + + /* Where to copy to? */ + if (skb_availroom(skb) > 0) { + /* We have some space in skb head. Superb! */ + copy = min_t(int, copy, skb_availroom(skb)); + err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); + if (err) + goto do_fault; + } else { + bool merge = true; + int i = skb_shinfo(skb)->nr_frags; + struct page_frag *pfrag = sk_page_frag(sk); + + if (!sk_page_frag_refill(sk, pfrag)) + goto wait_for_memory; - copy = min_t(int, copy, pfrag->size - pfrag->offset); - - if (!sk_wmem_schedule(sk, copy)) - goto wait_for_memory; - - err = skb_copy_to_page_nocache(sk, from, skb, - pfrag->page, - pfrag->offset, - copy); - if (err) - goto do_error; - - /* Update the skb. */ - if (merge) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else { - skb_fill_page_desc(skb, i, pfrag->page, - pfrag->offset, copy); - get_page(pfrag->page); + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + if (i == MAX_SKB_FRAGS || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; } - pfrag->offset += copy; + merge = false; } - if (!copied) - TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + copy = min_t(int, copy, pfrag->size - pfrag->offset); - tp->write_seq += copy; - TCP_SKB_CB(skb)->end_seq += copy; - tcp_skb_pcount_set(skb, 0); + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_memory; - from += copy; - copied += copy; - if ((seglen -= copy) == 0 && iovlen == 0) { - tcp_tx_timestamp(sk, skb); - goto out; + err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, + pfrag->page, + pfrag->offset, + copy); + if (err) + goto do_error; + + /* Update the skb. */ + if (merge) { + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); + } else { + skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, copy); + get_page(pfrag->page); } + pfrag->offset += copy; + } - if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) - continue; + if (!copied) + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; + tcp_skb_pcount_set(skb, 0); + + copied += copy; + if (!iov_iter_count(&msg->msg_iter)) { + tcp_tx_timestamp(sk, skb); + goto out; + } - if (forced_push(tp)) { - tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); - } else if (skb == tcp_send_head(sk)) - tcp_push_one(sk, mss_now); + if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) continue; + if (forced_push(tp)) { + tcp_mark_push(tp, skb); + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); + } else if (skb == tcp_send_head(sk)) + tcp_push_one(sk, mss_now); + continue; + wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, - TCP_NAGLE_PUSH, size_goal); + if (copied) + tcp_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); - if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) - goto do_error; + if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) + goto do_error; - mss_now = tcp_send_mss(sk, &size_goal, flags); - } + mss_now = tcp_send_mss(sk, &size_goal, flags); } out: diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index bb395d46a389..c037644eafb7 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) tcp_slow_start(tp, acked); else { bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt); + tcp_cong_avoid_ai(tp, ca->cnt, 1); } } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 27ead0dd16bc..d694088214cd 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> +#include <linux/jhash.h> #include <net/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); @@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) return NULL; } +/* Must be called with rcu lock held */ +static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) +{ + const struct tcp_congestion_ops *ca = tcp_ca_find(name); +#ifdef CONFIG_MODULES + if (!ca && capable(CAP_NET_ADMIN)) { + rcu_read_unlock(); + request_module("tcp_%s", name); + rcu_read_lock(); + ca = tcp_ca_find(name); + } +#endif + return ca; +} + +/* Simple linear search, not much in here. */ +struct tcp_congestion_ops *tcp_ca_find_key(u32 key) +{ + struct tcp_congestion_ops *e; + + list_for_each_entry_rcu(e, &tcp_cong_list, list) { + if (e->key == key) + return e; + } + + return NULL; +} + /* * Attach new congestion control algorithm to the list * of available options. @@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) return -EINVAL; } + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + spin_lock(&tcp_cong_list_lock); - if (tcp_ca_find(ca->name)) { - pr_notice("%s already registered\n", ca->name); + if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { + pr_notice("%s already registered or non-unique key\n", + ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); @@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); + + /* Wait for outstanding readers to complete before the + * module gets removed entirely. + * + * A try_module_get() should fail by now as our module is + * in "going" state since no refs are held anymore and + * module_exit() handler being called. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +u32 tcp_ca_get_key_by_name(const char *name) +{ + const struct tcp_congestion_ops *ca; + u32 key; + + might_sleep(); + + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + key = ca ? ca->key : TCP_CA_UNSPEC; + rcu_read_unlock(); + + return key; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name); + +char *tcp_ca_get_name_by_key(u32 key, char *buffer) +{ + const struct tcp_congestion_ops *ca; + char *ret = NULL; + + rcu_read_lock(); + ca = tcp_ca_find_key(key); + if (ca) + ret = strncpy(buffer, ca->name, + TCP_CA_NAME_MAX); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); + /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { @@ -107,6 +180,18 @@ void tcp_init_congestion_control(struct sock *sk) icsk->icsk_ca_ops->init(sk); } +static void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + + if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); +} + /* Manage refcounts on socket close. */ void tcp_cleanup_congestion_control(struct sock *sk) { @@ -241,42 +326,26 @@ out: int tcp_set_congestion_control(struct sock *sk, const char *name) { struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_congestion_ops *ca; + const struct tcp_congestion_ops *ca; int err = 0; - rcu_read_lock(); - ca = tcp_ca_find(name); + if (icsk->icsk_ca_dst_locked) + return -EPERM; - /* no change asking for existing value */ + rcu_read_lock(); + ca = __tcp_ca_find_autoload(name); + /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) goto out; - -#ifdef CONFIG_MODULES - /* not found attempt to autoload module */ - if (!ca && capable(CAP_NET_ADMIN)) { - rcu_read_unlock(); - request_module("tcp_%s", name); - rcu_read_lock(); - ca = tcp_ca_find(name); - } -#endif if (!ca) err = -ENOENT; - else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) err = -EPERM; - else if (!try_module_get(ca->owner)) err = -EBUSY; - - else { - tcp_cleanup_congestion_control(sk); - icsk->icsk_ca_ops = ca; - - if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - } + else + tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; @@ -291,26 +360,32 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and * returns the leftover acks to adjust cwnd in congestion avoidance mode. */ -void tcp_slow_start(struct tcp_sock *tp, u32 acked) +u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { u32 cwnd = tp->snd_cwnd + acked; if (cwnd > tp->snd_ssthresh) cwnd = tp->snd_ssthresh + 1; + acked -= cwnd - tp->snd_cwnd; tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); + + return acked; } EXPORT_SYMBOL_GPL(tcp_slow_start); -/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ -void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) +/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), + * for every packet that was ACKed. + */ +void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { + tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } else { - tp->snd_cwnd_cnt++; + u32 delta = tp->snd_cwnd_cnt / w; + + tp->snd_cwnd_cnt -= delta * w; + tp->snd_cwnd += delta; } + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); } EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); @@ -329,11 +404,13 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; /* In "safe" area, increase. */ - if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp, acked); + if (tp->snd_cwnd <= tp->snd_ssthresh) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } /* In dangerous area, increase slowly. */ - else - tcp_cong_avoid_ai(tp, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 6b6002416a73..4b276d1ed980 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -93,9 +93,7 @@ struct bictcp { u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ -#define ACK_RATIO_SHIFT 4 -#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT) - u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ + u16 unused; u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ @@ -114,7 +112,6 @@ static inline void bictcp_reset(struct bictcp *ca) ca->bic_K = 0; ca->delay_min = 0; ca->epoch_start = 0; - ca->delayed_ack = 2 << ACK_RATIO_SHIFT; ca->ack_cnt = 0; ca->tcp_cwnd = 0; ca->found = 0; @@ -205,23 +202,30 @@ static u32 cubic_root(u64 a) /* * Compute congestion window to use. */ -static inline void bictcp_update(struct bictcp *ca, u32 cwnd) +static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) { u32 delta, bic_target, max_cnt; u64 offs, t; - ca->ack_cnt++; /* count the number of ACKs */ + ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) return; + /* The CUBIC function can update ca->cnt at most once per jiffy. + * On all cwnd reduction events, ca->epoch_start is set to 0, + * which will force a recalculation of ca->cnt. + */ + if (ca->epoch_start && tcp_time_stamp == ca->last_time) + goto tcp_friendliness; + ca->last_cwnd = cwnd; ca->last_time = tcp_time_stamp; if (ca->epoch_start == 0) { ca->epoch_start = tcp_time_stamp; /* record beginning */ - ca->ack_cnt = 1; /* start counting */ + ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ if (ca->last_max_cwnd <= cwnd) { @@ -283,6 +287,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) if (ca->last_max_cwnd == 0 && ca->cnt > 20) ca->cnt = 20; /* increase cwnd 5% per RTT */ +tcp_friendliness: /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; @@ -301,7 +306,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) } } - ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack; if (ca->cnt == 0) /* cannot be zero */ ca->cnt = 1; } @@ -317,11 +321,12 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (tp->snd_cwnd <= tp->snd_ssthresh) { if (hystart && after(ack, ca->end_seq)) bictcp_hystart_reset(sk); - tcp_slow_start(tp, acked); - } else { - bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt); + acked = tcp_slow_start(tp, acked); + if (!acked) + return; } + bictcp_update(ca, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, ca->cnt, acked); } static u32 bictcp_recalc_ssthresh(struct sock *sk) @@ -411,20 +416,10 @@ static void hystart_update(struct sock *sk, u32 delay) */ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) { - const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; - if (icsk->icsk_ca_state == TCP_CA_Open) { - u32 ratio = ca->delayed_ack; - - ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; - ratio += cnt; - - ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); - } - /* Some calls are for duplicates without timetamps */ if (rtt_us < 0) return; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 815c85e3b1e0..ea82fd492c1b 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -134,6 +134,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, struct tcp_sock *tp; struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; + u32 end_seq; req->num_retrans = 0; req->num_timeout = 0; @@ -185,20 +186,35 @@ static bool tcp_fastopen_create_child(struct sock *sk, /* Queue the data carried in the SYN packet. We need to first * bump skb's refcnt because the caller will attempt to free it. + * Note that IPv6 might also have used skb_get() trick + * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts) + * So we need to eventually get a clone of the packet, + * before inserting it in sk_receive_queue. * * XXX (TFO) - we honor a zero-payload TFO request for now, * (any reason not to?) but no need to queue the skb since * there is no data. How about SYN+FIN? */ - if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { - skb = skb_get(skb); - skb_dst_drop(skb); - __skb_pull(skb, tcp_hdr(skb)->doff * 4); - skb_set_owner_r(skb, child); - __skb_queue_tail(&child->sk_receive_queue, skb); - tp->syn_data_acked = 1; + end_seq = TCP_SKB_CB(skb)->end_seq; + if (end_seq != TCP_SKB_CB(skb)->seq + 1) { + struct sk_buff *skb2; + + if (unlikely(skb_shared(skb))) + skb2 = skb_clone(skb, GFP_ATOMIC); + else + skb2 = skb_get(skb); + + if (likely(skb2)) { + skb_dst_drop(skb2); + __skb_pull(skb2, tcp_hdrlen(skb)); + skb_set_owner_r(skb2, child); + __skb_queue_tail(&child->sk_receive_queue, skb2); + tp->syn_data_acked = 1; + } else { + end_seq = TCP_SKB_CB(skb)->seq + 1; + } } - tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; sk->sk_data_ready(sk); bh_unlock_sock(child); sock_put(child); @@ -255,6 +271,9 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + if (foc->len == 0) /* Client requests a cookie */ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); + if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { @@ -265,7 +284,8 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; - if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + if (foc->len >= 0 && /* Client presents or requests a cookie */ + tcp_fastopen_cookie_gen(req, skb, &valid_foc) && foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { @@ -284,11 +304,10 @@ fastopen: LINUX_MIB_TCPFASTOPENPASSIVE); return true; } - } + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + } else if (foc->len > 0) /* Client presents an invalid cookie */ + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - NET_INC_STATS_BH(sock_net(sk), foc->len ? - LINUX_MIB_TCPFASTOPENPASSIVEFAIL : - LINUX_MIB_TCPFASTOPENCOOKIEREQD); *foc = valid_foc; return false; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075ab4d5af5e..8fdd27b17306 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; +int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -3183,8 +3184,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) - ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); + if (ca_ops->pkts_acked) { + long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); + ca_ops->pkts_acked(sk, pkts_acked, rtt_us); + } } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { @@ -3319,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 } /* RFC 5961 7 [ACK Throttling] */ -static void tcp_send_challenge_ack(struct sock *sk) +static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) { /* unprotected vars, we dont care of overwrites */ static u32 challenge_timestamp; static unsigned int challenge_count; - u32 now = jiffies / HZ; + struct tcp_sock *tp = tcp_sk(sk); + u32 now; + + /* First check our per-socket dupack rate limit. */ + if (tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDCHALLENGE, + &tp->last_oow_ack_time)) + return; + /* Then check the check host-wide RFC 5961 rate limit. */ + now = jiffies / HZ; if (now != challenge_timestamp) { challenge_timestamp = now; challenge_count = 0; @@ -3358,34 +3370,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } /* This routine deals with acks during a TLP episode. + * We mark the end of a TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { struct tcp_sock *tp = tcp_sk(sk); - bool is_tlp_dupack = (ack == tp->tlp_high_seq) && - !(flag & (FLAG_SND_UNA_ADVANCED | - FLAG_NOT_DUP | FLAG_DATA_SACKED)); - /* Mark the end of TLP episode on receiving TLP dupack or when - * ack is after tlp_high_seq. - */ - if (is_tlp_dupack) { - tp->tlp_high_seq = 0; + if (before(ack, tp->tlp_high_seq)) return; - } - if (after(ack, tp->tlp_high_seq)) { + if (flag & FLAG_DSACKING_ACK) { + /* This DSACK means original and TLP probe arrived; no loss */ + tp->tlp_high_seq = 0; + } else if (after(ack, tp->tlp_high_seq)) { + /* ACK advances: there was a loss, so reduce cwnd. Reset + * tlp_high_seq in tcp_init_cwnd_reduction() + */ + tcp_init_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_try_keep_open(sk); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } else if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED))) { + /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; - /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ - if (!(flag & FLAG_DSACKING_ACK)) { - tcp_init_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); - tcp_try_keep_open(sk); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSSPROBERECOVERY); - } } } @@ -3421,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) { /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -4990,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); + if (!tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDPAWS, + &tp->last_oow_ack_time)) + tcp_send_dupack(sk, skb); goto discard; } /* Reset is accepted even if it did not pass PAWS. */ @@ -5007,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (!th->rst) { if (th->syn) goto syn_challenge; - tcp_send_dupack(sk, skb); + if (!tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDSEQ, + &tp->last_oow_ack_time)) + tcp_send_dupack(sk, skb); } goto discard; } @@ -5023,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) tcp_reset(sk); else - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); goto discard; } @@ -5037,7 +5055,7 @@ syn_challenge: if (syn_inerr) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); - tcp_send_challenge_ack(sk); + tcp_send_challenge_ack(sk, skb); goto discard; } @@ -5870,10 +5888,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) * TCP ECN negotiation. * * Exception: tcp_ca wants ECN. This is required for DCTCP - * congestion control; it requires setting ECT on all packets, - * including SYN. We inverse the test in this case: If our - * local socket wants ECN, but peer only set ece/cwr (but not - * ECT in IP header) its probably a non-DCTCP aware sender. + * congestion control: Linux DCTCP asserts ECT on all packets, + * including SYN, which is most optimal solution; however, + * others, such as FreeBSD do not. */ static void tcp_ecn_create_request(struct request_sock *req, const struct sk_buff *skb, @@ -5883,18 +5900,15 @@ static void tcp_ecn_create_request(struct request_sock *req, const struct tcphdr *th = tcp_hdr(skb); const struct net *net = sock_net(listen_sk); bool th_ecn = th->ece && th->cwr; - bool ect, need_ecn, ecn_ok; + bool ect, ecn_ok; if (!th_ecn) return; ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); - need_ecn = tcp_ca_needs_ecn(listen_sk); ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); - if (!ect && !need_ecn && ecn_ok) - inet_rsk(req)->ecn_ok = 1; - else if (ect && need_ecn) + if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) inet_rsk(req)->ecn_ok = 1; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a3f72d7fc06c..5a2dfed4783b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -683,7 +683,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.bound_dev_if = sk->sk_bound_dev_if; arg.tos = ip_hdr(skb)->tos; - ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); @@ -767,7 +768,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; - ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); @@ -1340,6 +1342,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && @@ -2428,14 +2432,40 @@ struct proto tcp_prot = { }; EXPORT_SYMBOL(tcp_prot); +static void __net_exit tcp_sk_exit(struct net *net) +{ + int cpu; + + for_each_possible_cpu(cpu) + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); + free_percpu(net->ipv4.tcp_sk); +} + static int __net_init tcp_sk_init(struct net *net) { + int res, cpu; + + net->ipv4.tcp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.tcp_sk) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct sock *sk; + + res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, + IPPROTO_TCP, net); + if (res) + goto fail; + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; + } net->ipv4.sysctl_tcp_ecn = 2; + net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; return 0; -} -static void __net_exit tcp_sk_exit(struct net *net) -{ +fail: + tcp_sk_exit(net); + + return res; } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 272327134a1b..2379c1b4efb2 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -47,6 +47,10 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) return; percpu_counter_destroy(&cg_proto->sockets_allocated); + + if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) + static_key_slow_dec(&memcg_socket_limit_enabled); + } EXPORT_SYMBOL(tcp_destroy_cgroup); @@ -120,7 +124,7 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, switch (of_cft(of)->private) { case RES_LIMIT: /* see memcontrol.c */ - ret = page_counter_memparse(buf, &nr_pages); + ret = page_counter_memparse(buf, "-1", &nr_pages); if (ret) break; mutex_lock(&tcp_limit_mutex); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index ed9c9a91851c..e5f41bd5ec1b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -886,7 +886,8 @@ static int tcp_metrics_dump_info(struct sk_buff *skb, if (tcp_metrics_fill_info(skb, tm) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d2680b65db..dd11ac7798c6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -58,6 +58,25 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) return seq == e_win && seq == end_seq; } +static enum tcp_tw_status +tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, + const struct sk_buff *skb, int mib_idx) +{ + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + + if (!tcp_oow_rate_limited(twsk_net(tw), skb, mib_idx, + &tcptw->tw_last_oow_ack_time)) { + /* Send ACK. Note, we do not put the bucket, + * it will be released by caller. + */ + return TCP_TW_ACK; + } + + /* We are rate-limiting, so just release the tw sock and drop skb. */ + inet_twsk_put(tw); + return TCP_TW_SUCCESS; +} + /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -116,7 +135,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt, tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) - return TCP_TW_ACK; + return tcp_timewait_check_oow_rate_limit( + tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); if (th->rst) goto kill; @@ -250,10 +270,8 @@ kill: inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, TCP_TIMEWAIT_LEN); - /* Send ACK. Note, we do not put the bucket, - * it will be released by caller. - */ - return TCP_TW_ACK; + return tcp_timewait_check_oow_rate_limit( + tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); } inet_twsk_put(tw); return TCP_TW_SUCCESS; @@ -289,6 +307,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; + tcptw->tw_last_oow_ack_time = 0; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { @@ -399,6 +418,32 @@ static void tcp_ecn_openreq_child(struct tcp_sock *tp, tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } +void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + bool ca_got_dst = false; + + if (ca_key != TCP_CA_UNSPEC) { + const struct tcp_congestion_ops *ca; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + ca_got_dst = true; + } + rcu_read_unlock(); + } + + if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner)) + tcp_assign_congestion_control(sk); + + tcp_set_ca_state(sk, TCP_CA_Open); +} +EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -441,6 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_enable_early_retrans(newtp); newtp->tlp_high_seq = 0; newtp->lsndtime = treq->snt_synack; + newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; /* So many TCP implementations out there (incorrectly) count the @@ -451,10 +497,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - if (!try_module_get(newicsk->icsk_ca_ops->owner)) - tcp_assign_congestion_control(newsk); - - tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; @@ -583,7 +625,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Reset timer after retransmitting SYNACK, similar to * the idea of fast retransmit in recovery. */ - if (!inet_rtx_syn_ack(sk, req)) + if (!tcp_oow_rate_limited(sock_net(sk), skb, + LINUX_MIB_TCPACKSKIPPEDSYNRECV, + &tcp_rsk(req)->last_oow_ack_time) && + + !inet_rtx_syn_ack(sk, req)) req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX) + jiffies; return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f18262e2326..a2a796c5536b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072; */ int sysctl_tcp_tso_win_divisor __read_mostly = 3; -int sysctl_tcp_mtu_probing __read_mostly = 0; -int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; - /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = tcp_wfree; + skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); @@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; } EXPORT_SYMBOL(tcp_mtup_init); @@ -2019,7 +2017,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; - if (tso_segs == 1) { + if (tso_segs == 1 || !max_segs) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) @@ -2032,7 +2030,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) + if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, @@ -2939,6 +2937,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } EXPORT_SYMBOL(tcp_make_synack); +static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_congestion_ops *ca; + u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + + if (ca_key == TCP_CA_UNSPEC) + return; + + rcu_read_lock(); + ca = tcp_ca_find_key(ca_key); + if (likely(ca && try_module_get(ca->owner))) { + module_put(icsk->icsk_ca_ops->owner); + icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); + icsk->icsk_ca_ops = ca; + } + rcu_read_unlock(); +} + /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { @@ -2964,6 +2981,8 @@ static void tcp_connect_init(struct sock *sk) tcp_mtup_init(sk); tcp_sync_mss(sk, dst_mtu(dst)); + tcp_ca_dst_init(sk, dst); + if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric_advmss(dst); @@ -3034,7 +3053,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0; + int syn_loss = 0, space, err = 0, copied; unsigned long last_syn_loss = 0; struct sk_buff *syn_data; @@ -3072,11 +3091,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); - if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), - fo->data->msg_iter.iov, 0, space))) { + copied = copy_from_iter(skb_put(syn_data, space), space, + &fo->data->msg_iter); + if (unlikely(!copied)) { kfree_skb(syn_data); goto fallback; } + if (copied != space) { + skb_trim(syn_data, copied); + space = copied; + } /* No more data pending in inet_wait_for_connect() */ if (space == fo->size) @@ -3244,6 +3268,14 @@ void tcp_send_ack(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); + /* We do not want pure acks influencing TCP Small Queues or fq/pacing + * too much. + * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 + * We also avoid tcp_wfree() overhead (cache line miss accessing + * tp->tsq_flags) by using regular sock_wfree() + */ + skb_set_tcp_pure_ack(buff); + /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 6824afb65d93..333bcb2415ff 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -25,7 +25,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp, acked); else - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + 1); } static u32 tcp_scalable_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 1829c7fbc77e..0732b787904e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -101,17 +101,20 @@ static int tcp_orphan_retries(struct sock *sk, int alive) static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { + struct net *net = sock_net(sk); + /* Black hole detection */ - if (sysctl_tcp_mtu_probing) { + if (net->ipv4.sysctl_tcp_mtu_probing) { if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { + struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); int mss; mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(sysctl_tcp_base_mss, mss); + mss = min(net->ipv4.sysctl_tcp_base_mss, mss); mss = max(mss, 68 - tp->tcp_header_len); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index a4d2d2d88dca..112151eeee45 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -159,7 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* In the "non-congestive state", increase cwnd * every rtt. */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); } else { /* In the "congestive state", increase cwnd * every other rtt. diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index cd7273218598..17d35662930d 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -92,7 +92,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 13b4dcf86ef6..97ef1f8b7be8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1329,7 +1329,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); err = copied; if (flags & MSG_TRUNC) @@ -1806,7 +1806,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, inet_compute_pseudo); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 7927db0a9279..4a000f1dd757 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -99,11 +99,13 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin s_slot = cb->args[0]; num = s_num = cb->args[1]; - for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { + for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) { struct sock *sk; struct hlist_nulls_node *node; struct udp_hslot *hslot = &table->hash[slot]; + num = 0; + if (hlist_nulls_empty(&hslot->head)) continue; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d3e537ef6b7f..4915d8284a86 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -339,7 +339,8 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - pp = uo_priv->offload->callbacks.gro_receive(head, skb); + pp = uo_priv->offload->callbacks.gro_receive(head, skb, + uo_priv->offload); out_unlock: rcu_read_unlock(); @@ -395,10 +396,19 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) if (uo_priv != NULL) { NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; - err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr)); + err = uo_priv->offload->callbacks.gro_complete(skb, + nhoff + sizeof(struct udphdr), + uo_priv->offload); } rcu_read_unlock(); + + if (skb->remcsum_offload) + skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; + + skb->encapsulation = 1; + skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); + return err; } @@ -407,9 +417,13 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) + if (uh->check) { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, iph->daddr, 0); + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } return udp_gro_complete(skb, nhoff); } diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 1671263e5fa0..c83b35485056 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, inet_sk(sk)->mc_loop = 0; /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ - udp_set_convert_csum(sk, true); + inet_inc_convert_csum(sk); rcu_assign_sk_user_data(sk, cfg->sk_user_data); @@ -75,10 +75,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, - struct sk_buff *skb, __be32 src, __be32 dst, - __u8 tos, __u8 ttl, __be16 df, __be16 src_port, - __be16 dst_port, bool xnet) +int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) { struct udphdr *uh; @@ -90,9 +90,9 @@ int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt, uh->source = src_port; uh->len = htons(skb->len); - udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len); + udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP, + return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f7c8bbeb27b7..98e4a63d72bb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -201,6 +201,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -238,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .disable_ipv6 = 0, .accept_dad = 1, .suppress_frag_ndisc = 1, + .accept_ra_mtu = 1, }; /* Check if a valid qdisc is available */ @@ -489,7 +491,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -619,7 +622,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) { + -1) < 0) { rcu_read_unlock(); goto done; } @@ -635,7 +638,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -646,7 +649,7 @@ cont: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNETCONF, NLM_F_MULTI, - -1) <= 0) + -1) < 0) goto done; else h++; @@ -1519,15 +1522,30 @@ static int ipv6_count_addresses(struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { + return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE); +} +EXPORT_SYMBOL(ipv6_chk_addr); + +int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict, + u32 banned_flags) +{ struct inet6_ifaddr *ifp; unsigned int hash = inet6_addr_hash(addr); + u32 ifp_flags; rcu_read_lock_bh(); hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; + /* Decouple optimistic from tentative for evaluation here. + * Ban optimistic addresses explicitly, when required. + */ + ifp_flags = (ifp->flags&IFA_F_OPTIMISTIC) + ? (ifp->flags&~IFA_F_TENTATIVE) + : ifp->flags; if (ipv6_addr_equal(&ifp->addr, addr) && - !(ifp->flags&IFA_F_TENTATIVE) && + !(ifp_flags&banned_flags) && (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock_bh(); @@ -1538,7 +1556,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, rcu_read_unlock_bh(); return 0; } -EXPORT_SYMBOL(ipv6_chk_addr); +EXPORT_SYMBOL(ipv6_chk_addr_and_flags); static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev) @@ -4047,7 +4065,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) goto error; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; error: nlmsg_cancel(skb, nlh); @@ -4076,7 +4095,8 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, @@ -4101,7 +4121,8 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } enum addr_type_t { @@ -4134,7 +4155,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } @@ -4151,7 +4172,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } break; @@ -4166,7 +4187,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } break; @@ -4209,7 +4230,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, goto cont; if (in6_dump_addrs(idev, skb, cb, type, - s_ip_idx, &ip_idx) <= 0) + s_ip_idx, &ip_idx) < 0) goto done; cont: idx++; @@ -4376,6 +4397,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; + array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; } static inline size_t inet6_ifla6_size(void) @@ -4572,6 +4594,22 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return 0; } +static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = { + [IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 }, + [IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) }, +}; + +static int inet6_validate_link_af(const struct net_device *dev, + const struct nlattr *nla) +{ + struct nlattr *tb[IFLA_INET6_MAX + 1]; + + if (dev && !__in6_dev_get(dev)) + return -EAFNOSUPPORT; + + return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy); +} + static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) { int err = -EINVAL; @@ -4638,7 +4676,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, goto nla_put_failure; nla_nest_end(skb, protoinfo); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -4670,7 +4709,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWLINK, NLM_F_MULTI) <= 0) + RTM_NEWLINK, NLM_F_MULTI) < 0) goto out; cont: idx++; @@ -4747,7 +4786,8 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, ci.valid_time = ntohl(pinfo->valid); if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -5253,6 +5293,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { + .procname = "accept_ra_mtu", + .data = &ipv6_devconf.accept_ra_mtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, @@ -5389,10 +5436,11 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -static struct rtnl_af_ops inet6_ops = { +static struct rtnl_af_ops inet6_ops __read_mostly = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, + .validate_link_af = inet6_validate_link_af, .set_link_af = inet6_set_link_af, }; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index fd0dc47f471d..e43e79d0a612 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -490,7 +490,8 @@ static int ip6addrlbl_fill(struct sk_buff *skb, return -EMSGSIZE; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI); - if (err <= 0) + if (err < 0) break; } idx++; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 100c589a2a6c..c215be70cac0 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin) { + if (sin && skb->len) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -393,11 +393,9 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; - sin->sin6_family = AF_UNSPEC; - if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { + memset(sin, 0, sizeof(*sin)); + if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) { sin->sin6_family = AF_INET6; - sin->sin6_flowinfo = 0; - sin->sin6_port = 0; if (np->rxopt.all) { if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) @@ -412,12 +410,9 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) ipv6_iface_scope_id(&sin->sin6_addr, IP6CB(skb)->iif); } else { - struct inet_sock *inet = inet_sk(sk); - ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin->sin6_addr); - sin->sin6_scope_id = 0; - if (inet->cmsg_flags) + if (inet_sk(sk)->cmsg_flags) ip_cmsg_recv(msg, skb); } } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d674152b6ede..a5e95199585e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -427,7 +427,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Dest addr check */ - if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) { + if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { if (type != ICMPV6_PKT_TOOBIG && !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b2d1838897c9..263ef4143bff 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -277,7 +277,6 @@ static int fib6_dump_node(struct fib6_walker *w) w->leaf = rt; return 1; } - WARN_ON(res == 0); } w->leaf = NULL; return 0; @@ -630,33 +629,59 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } -static int fib6_commit_metrics(struct dst_entry *dst, - struct nlattr *mx, int mx_len) +static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) { - struct nlattr *nla; - int remaining; - u32 *mp; + int i; + + for (i = 0; i < RTAX_MAX; i++) { + if (test_bit(i, mxc->mx_valid)) + mp[i] = mxc->mx[i]; + } +} + +static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) +{ + if (!mxc->mx) + return 0; if (dst->flags & DST_HOST) { - mp = dst_metrics_write_ptr(dst); - } else { - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); - if (!mp) + u32 *mp = dst_metrics_write_ptr(dst); + + if (unlikely(!mp)) return -ENOMEM; - dst_init_metrics(dst, mp, 0); - } - nla_for_each_attr(nla, mx, mx_len, remaining) { - int type = nla_type(nla); + fib6_copy_metrics(mp, mxc); + } else { + dst_init_metrics(dst, mxc->mx, false); - if (type) { - if (type > RTAX_MAX) - return -EINVAL; + /* We've stolen mx now. */ + mxc->mx = NULL; + } - mp[type - 1] = nla_get_u32(nla); + return 0; +} + +static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, + struct net *net) +{ + if (atomic_read(&rt->rt6i_ref) != 1) { + /* This route is used as dummy address holder in some split + * nodes. It is not leaked, but it still holds other resources, + * which must be released in time. So, scan ascendant nodes + * and replace dummy references to this route with references + * to still alive ones. + */ + while (fn) { + if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { + fn->leaf = fib6_find_prefix(net, fn); + atomic_inc(&fn->leaf->rt6i_ref); + rt6_release(rt); + } + fn = fn->parent; } + /* No more references are possible at this point. */ + BUG_ON(atomic_read(&rt->rt6i_ref) != 1); } - return 0; } /* @@ -664,7 +689,7 @@ static int fib6_commit_metrics(struct dst_entry *dst, */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct nlattr *mx, int mx_len) + struct nl_info *info, struct mx6_config *mxc) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -773,11 +798,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; @@ -797,21 +821,22 @@ add: pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } - if (mx) { - err = fib6_commit_metrics(&rt->dst, mx, mx_len); - if (err) - return err; - } + + err = fib6_commit_metrics(&rt->dst, mxc); + if (err) + return err; + *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info); - rt6_release(iter); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } + fib6_purge_rt(iter, fn, info->nl_net); + rt6_release(iter); } return 0; @@ -838,8 +863,8 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, + struct nl_info *info, struct mx6_config *mxc) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -934,7 +959,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, } #endif - err = fib6_add_rt2node(fn, rt, info, mx, mx_len); + err = fib6_add_rt2node(fn, rt, info, mxc); if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) @@ -1322,24 +1347,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, fn = fib6_repair_tree(net, fn); } - if (atomic_read(&rt->rt6i_ref) != 1) { - /* This route is used as dummy address holder in some split - * nodes. It is not leaked, but it still holds other resources, - * which must be released in time. So, scan ascendant nodes - * and replace dummy references to this route with references - * to still alive ones. - */ - while (fn) { - if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { - fn->leaf = fib6_find_prefix(net, fn); - atomic_inc(&fn->leaf->rt6i_ref); - rt6_release(rt); - } - fn = fn->parent; - } - /* No more references are possible at this point. */ - BUG_ON(atomic_read(&rt->rt6i_ref) != 1); - } + fib6_purge_rt(rt, fn, net); inet6_rt_notify(RTM_DELROUTE, rt, info); rt6_release(rt); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 2f780cba6e12..f45d6db50a45 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -172,7 +172,7 @@ static void __net_exit ip6_fl_purge(struct net *net) { int i; - spin_lock(&ip6_fl_lock); + spin_lock_bh(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl; struct ip6_flowlabel __rcu **flp; @@ -190,7 +190,7 @@ static void __net_exit ip6_fl_purge(struct net *net) flp = &fl->next; } } - spin_unlock(&ip6_fl_lock); + spin_unlock_bh(&ip6_fl_lock); } static struct ip6_flowlabel *fl_intern(struct net *net, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 13cda4c6313b..bc28b7d42a6d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -417,7 +417,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (code == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); - if (teli && teli == info - 2) { + if (teli && teli == be32_to_cpu(info) - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", @@ -429,7 +429,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } break; case ICMPV6_PKT_TOOBIG: - mtu = info - offset; + mtu = be32_to_cpu(info) - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; @@ -1662,6 +1662,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { @@ -1675,6 +1676,7 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { .changelink = ip6gre_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; /* diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ce69a12ae48c..7deebf102cba 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -537,20 +537,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static u32 ip6_idents_hashrnd __read_mostly; - u32 hash, id; - - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - - hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); - hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); - - id = ip_idents_reserve(hash, 1); - fhdr->identification = htonl(id); -} - int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -1041,6 +1027,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline int ip6_ufo_append_data(struct sock *sk, + struct sk_buff_head *queue, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, @@ -1056,7 +1043,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - skb = skb_peek_tail(&sk->sk_write_queue); + skb = skb_peek_tail(queue); if (skb == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, @@ -1079,7 +1066,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; } @@ -1135,99 +1122,106 @@ static void ip6_append_data_mtu(unsigned int *mtu, } } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, - int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) +static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, + struct inet6_cork *v6_cork, + int hlimit, int tclass, struct ipv6_txoptions *opt, + struct rt6_info *rt, struct flowi6 *fl6) { - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct inet_cork *cork; + unsigned int mtu; + + /* + * setup for corking + */ + if (opt) { + if (WARN_ON(v6_cork->opt)) + return -EINVAL; + + v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); + if (unlikely(v6_cork->opt == NULL)) + return -ENOBUFS; + + v6_cork->opt->tot_len = opt->tot_len; + v6_cork->opt->opt_flen = opt->opt_flen; + v6_cork->opt->opt_nflen = opt->opt_nflen; + + v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, + sk->sk_allocation); + if (opt->dst0opt && !v6_cork->opt->dst0opt) + return -ENOBUFS; + + v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, + sk->sk_allocation); + if (opt->dst1opt && !v6_cork->opt->dst1opt) + return -ENOBUFS; + + v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, + sk->sk_allocation); + if (opt->hopopt && !v6_cork->opt->hopopt) + return -ENOBUFS; + + v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, + sk->sk_allocation); + if (opt->srcrt && !v6_cork->opt->srcrt) + return -ENOBUFS; + + /* need source address above miyazawa*/ + } + dst_hold(&rt->dst); + cork->base.dst = &rt->dst; + cork->fl.u.ip6 = *fl6; + v6_cork->hop_limit = hlimit; + v6_cork->tclass = tclass; + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } + cork->base.fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->base.flags |= IPCORK_ALLFRAG; + cork->base.length = 0; + + return 0; +} + +static int __ip6_append_data(struct sock *sk, + struct flowi6 *fl6, + struct sk_buff_head *queue, + struct inet_cork *cork, + struct inet6_cork *v6_cork, + struct page_frag *pfrag, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + unsigned int flags, int dontfrag) +{ struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; - int exthdrlen; - int dst_exthdrlen; + int exthdrlen = 0; + int dst_exthdrlen = 0; int hh_len; int copy; int err; int offset = 0; __u8 tx_flags = 0; u32 tskey = 0; + struct rt6_info *rt = (struct rt6_info *)cork->dst; + struct ipv6_txoptions *opt = v6_cork->opt; + int csummode = CHECKSUM_NONE; - if (flags&MSG_PROBE) - return 0; - cork = &inet->cork.base; - if (skb_queue_empty(&sk->sk_write_queue)) { - /* - * setup for corking - */ - if (opt) { - if (WARN_ON(np->cork.opt)) - return -EINVAL; - - np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); - if (unlikely(np->cork.opt == NULL)) - return -ENOBUFS; - - np->cork.opt->tot_len = opt->tot_len; - np->cork.opt->opt_flen = opt->opt_flen; - np->cork.opt->opt_nflen = opt->opt_nflen; - - np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, - sk->sk_allocation); - if (opt->dst0opt && !np->cork.opt->dst0opt) - return -ENOBUFS; - - np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, - sk->sk_allocation); - if (opt->dst1opt && !np->cork.opt->dst1opt) - return -ENOBUFS; - - np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, - sk->sk_allocation); - if (opt->hopopt && !np->cork.opt->hopopt) - return -ENOBUFS; - - np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, - sk->sk_allocation); - if (opt->srcrt && !np->cork.opt->srcrt) - return -ENOBUFS; - - /* need source address above miyazawa*/ - } - dst_hold(&rt->dst); - cork->dst = &rt->dst; - inet->cork.fl.u.ip6 = *fl6; - np->cork.hop_limit = hlimit; - np->cork.tclass = tclass; - if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); - else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } - cork->fragsize = mtu; - if (dst_allfrag(rt->dst.path)) - cork->flags |= IPCORK_ALLFRAG; - cork->length = 0; - exthdrlen = (opt ? opt->opt_flen : 0); - length += exthdrlen; - transhdrlen += exthdrlen; + skb = skb_peek_tail(queue); + if (!skb) { + exthdrlen = opt ? opt->opt_flen : 0; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; - } else { - rt = (struct rt6_info *)cork->dst; - fl6 = &inet->cork.fl.u.ip6; - opt = np->cork.opt; - transhdrlen = 0; - exthdrlen = 0; - dst_exthdrlen = 0; - mtu = cork->fragsize; } + + mtu = cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1276,6 +1270,14 @@ emsgsize: tskey = sk->sk_tskey++; } + /* If this is the first and only packet and device + * supports checksum offloading, let's use it. + */ + if (!skb && sk->sk_protocol == IPPROTO_UDP && + length + fragheaderlen < mtu && + rt->dst.dev->features & NETIF_F_V6_CSUM && + !exthdrlen) + csummode = CHECKSUM_PARTIAL; /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1292,13 +1294,12 @@ emsgsize: * --yoshfuji */ - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip6_ufo_append_data(sk, getfrag, from, length, + err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); if (err) @@ -1389,7 +1390,7 @@ alloc_new_skb: * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + @@ -1439,7 +1440,7 @@ alloc_new_skb: /* * Put the packet on the pending queue */ - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); continue; } @@ -1458,7 +1459,6 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - struct page_frag *pfrag = sk_page_frag(sk); err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) @@ -1501,43 +1501,81 @@ error: IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } + +int ip6_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, int hlimit, + int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, int dontfrag) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + int exthdrlen; + int err; + + if (flags&MSG_PROBE) + return 0; + if (skb_queue_empty(&sk->sk_write_queue)) { + /* + * setup for corking + */ + err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, + tclass, opt, rt, fl6); + if (err) + return err; + + exthdrlen = (opt ? opt->opt_flen : 0); + length += exthdrlen; + transhdrlen += exthdrlen; + } else { + fl6 = &inet->cork.fl.u.ip6; + transhdrlen = 0; + } + + return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, + &np->cork, sk_page_frag(sk), getfrag, + from, length, transhdrlen, flags, dontfrag); +} EXPORT_SYMBOL_GPL(ip6_append_data); -static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) +static void ip6_cork_release(struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { - if (np->cork.opt) { - kfree(np->cork.opt->dst0opt); - kfree(np->cork.opt->dst1opt); - kfree(np->cork.opt->hopopt); - kfree(np->cork.opt->srcrt); - kfree(np->cork.opt); - np->cork.opt = NULL; + if (v6_cork->opt) { + kfree(v6_cork->opt->dst0opt); + kfree(v6_cork->opt->dst1opt); + kfree(v6_cork->opt->hopopt); + kfree(v6_cork->opt->srcrt); + kfree(v6_cork->opt); + v6_cork->opt = NULL; } - if (inet->cork.base.dst) { - dst_release(inet->cork.base.dst); - inet->cork.base.dst = NULL; - inet->cork.base.flags &= ~IPCORK_ALLFRAG; + if (cork->base.dst) { + dst_release(cork->base.dst); + cork->base.dst = NULL; + cork->base.flags &= ~IPCORK_ALLFRAG; } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + memset(&cork->fl, 0, sizeof(cork->fl)); } -int ip6_push_pending_frames(struct sock *sk) +struct sk_buff *__ip6_make_skb(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; - struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct ipv6_txoptions *opt = v6_cork->opt; + struct rt6_info *rt = (struct rt6_info *)cork->base.dst; + struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; - int err = 0; - skb = __skb_dequeue(&sk->sk_write_queue); + skb = __skb_dequeue(queue); if (skb == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); @@ -1545,7 +1583,7 @@ int ip6_push_pending_frames(struct sock *sk) /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); - while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); @@ -1570,10 +1608,10 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, + ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, np->autoflowlabel)); - hdr->hop_limit = np->cork.hop_limit; + hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; @@ -1590,34 +1628,104 @@ int ip6_push_pending_frames(struct sock *sk) ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } + ip6_cork_release(cork, v6_cork); +out: + return skb; +} + +int ip6_send_skb(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + int err; + err = ip6_local_out(skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) - goto error; + IP6_INC_STATS(net, rt->rt6i_idev, + IPSTATS_MIB_OUTDISCARDS); } -out: - ip6_cork_release(inet, np); return err; -error: - IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); - goto out; +} + +int ip6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + + skb = ip6_finish_skb(sk); + if (!skb) + return 0; + + return ip6_send_skb(skb); } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); -void ip6_flush_pending_frames(struct sock *sk) +static void __ip6_flush_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork_full *cork, + struct inet6_cork *v6_cork) { struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { + while ((skb = __skb_dequeue_tail(queue)) != NULL) { if (skb_dst(skb)) IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } - ip6_cork_release(inet_sk(sk), inet6_sk(sk)); + ip6_cork_release(cork, v6_cork); +} + +void ip6_flush_pending_frames(struct sock *sk) +{ + __ip6_flush_pending_frames(sk, &sk->sk_write_queue, + &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); + +struct sk_buff *ip6_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, + struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, + int dontfrag) +{ + struct inet_cork_full cork; + struct inet6_cork v6_cork; + struct sk_buff_head queue; + int exthdrlen = (opt ? opt->opt_flen : 0); + int err; + + if (flags & MSG_PROBE) + return NULL; + + __skb_queue_head_init(&queue); + + cork.base.flags = 0; + cork.base.addr = 0; + cork.base.opt = NULL; + v6_cork.opt = NULL; + err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); + if (err) + return ERR_PTR(err); + + if (dontfrag < 0) + dontfrag = inet6_sk(sk)->dontfrag; + + err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, + ¤t->task_frag, getfrag, from, + length + exthdrlen, transhdrlen + exthdrlen, + flags, dontfrag); + if (err) { + __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); + return ERR_PTR(err); + } + + return __ip6_make_skb(sk, &queue, &cork, &v6_cork); +} diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 92b3da571980..266a264ec212 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1760,6 +1760,14 @@ nla_put_failure: return -EMSGSIZE; } +struct net *ip6_tnl_get_link_net(const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + return tunnel->net; +} +EXPORT_SYMBOL(ip6_tnl_get_link_net); + static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, @@ -1783,6 +1791,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = { .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 8db6c98fe218..32d9b268e7d8 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -62,14 +62,14 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, - struct sk_buff *skb, struct net_device *dev, - struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port) +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, + struct net_device *dev, struct in6_addr *saddr, + struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, + __be16 dst_port, bool nocheck) { struct udphdr *uh; struct ipv6hdr *ip6h; - struct sock *sk = sock->sk; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); @@ -85,7 +85,7 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, | IPSKB_REROUTED); skb_dst_set(skb, dst); - udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len); + udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ace10d0b3aac..5fb9e212eca8 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1016,6 +1016,7 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .changelink = vti6_changelink, .get_size = vti6_get_size, .fill_info = vti6_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 722669754bbf..34b682617f50 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2388,7 +2388,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (err < 0 && err != -ENOENT) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 66980d8d98d1..8d766d9100cb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -996,13 +996,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, lock_sock(sk); skb = np->pktoptions; if (skb) - atomic_inc(&skb->users); - release_sock(sk); - - if (skb) { ip6_datagram_recv_ctl(sk, &msg, skb); - kfree_skb(skb); - } else { + release_sock(sk); + if (!skb) { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 682866777d53..471ed24aabae 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -655,7 +655,9 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) + if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, + dev, 1, + IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { @@ -1348,7 +1350,7 @@ skip_routeinfo: } } - if (ndopts.nd_opts_mtu) { + if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) { __be32 n; u32 mtu; diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 2433a6bfb191..11820b6b3613 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -27,10 +27,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16) - data[priv->sreg_proto_min].data[0]; - range.max_proto.all = (__force __be16) - data[priv->sreg_proto_max].data[0]; + range.min_proto.all = + *(__be16 *)&data[priv->sreg_proto_min].data[0]; + range.max_proto.all = + *(__be16 *)&data[priv->sreg_proto_max].data[0]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 97f41a3e68d9..74581f706c4d 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -9,6 +9,25 @@ #include <net/addrconf.h> #include <net/secure_seq.h> +static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, + struct in6_addr *src) +{ + u32 hash, id; + + hash = __ipv6_addr_jhash(dst, hashrnd); + hash = __ipv6_addr_jhash(src, hash); + + /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, + * set the hight order instead thus minimizing possible future + * collisions. + */ + id = ip_idents_reserve(hash, 1); + if (unlikely(!id)) + id = 1 << 31; + + return id; +} + /* This function exists only for tap drivers that must support broken * clients requesting UFO without specifying an IPv6 fragment ID. * @@ -22,7 +41,7 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; - u32 hash, id; + u32 id; addrs = skb_header_pointer(skb, skb_network_offset(skb) + @@ -34,14 +53,25 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) net_get_random_once(&ip6_proxy_idents_hashrnd, sizeof(ip6_proxy_idents_hashrnd)); - hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); - hash = __ipv6_addr_jhash(&addrs[0], hash); - - id = ip_idents_reserve(hash, 1); + id = __ipv6_select_ident(ip6_proxy_idents_hashrnd, + &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); +void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + u32 id; + + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + + id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr, + &rt->rt6i_src.addr); + fhdr->identification = htonl(id); +} +EXPORT_SYMBOL(ipv6_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 2d3148378a1f..bd46f736f61d 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -163,8 +163,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; - /* XXX: stripping const */ - pfh.iov = (struct iovec *)msg->msg_iter.iov; + pfh.msg = msg; pfh.wcheck = 0; pfh.family = AF_INET6; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ee25631f8c29..dae7f1a1e464 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -609,7 +609,7 @@ out: return err; } -static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, +static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags) { @@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, skb->ip_summed = CHECKSUM_NONE; skb->transport_header = skb->network_header; - err = memcpy_fromiovecend((void *)iph, from, 0, length); + err = memcpy_from_msg(iph, msg, length); if (err) goto error_fault; @@ -886,8 +886,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - /* XXX: stripping const */ - err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags); + err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c91083156edb..4688bd4d7f59 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -141,7 +141,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) u32 *p = NULL; if (!(rt->dst.flags & DST_HOST)) - return NULL; + return dst_cow_metrics_generic(dst, old); peer = rt6_get_peer_create(rt); if (peer) { @@ -499,7 +499,7 @@ static void rt6_probe_deferred(struct work_struct *w) addrconf_addr_solict_mult(&work->target, &mcaddr); ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); dev_put(work->dev); - kfree(w); + kfree(work); } static void rt6_probe(struct rt6_info *rt) @@ -853,14 +853,14 @@ EXPORT_SYMBOL(rt6_lookup); */ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct nlattr *mx, int mx_len) + struct mx6_config *mxc) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); + err = fib6_add(&table->tb6_root, rt, info, mxc); write_unlock_bh(&table->tb6_lock); return err; @@ -868,10 +868,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, int ip6_ins_rt(struct rt6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; - return __ip6_ins_rt(rt, &info, NULL, 0); + struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; + struct mx6_config mxc = { .mx = NULL, }; + + return __ip6_ins_rt(rt, &info, &mxc); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -1160,12 +1160,9 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct net *net = dev_net(dst->dev); rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) { - u32 features = dst_metric(dst, RTAX_FEATURES); + if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(dst, RTAX_FEATURES, features); - } + dst_metric_set(dst, RTAX_MTU, mtu); rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } @@ -1245,12 +1242,16 @@ restart: rt = net->ipv6.ip6_null_entry; else if (rt->dst.error) { rt = net->ipv6.ip6_null_entry; - } else if (rt == net->ipv6.ip6_null_entry) { + goto out; + } + + if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } +out: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); @@ -1470,9 +1471,51 @@ out: return entries > rt_max_size; } -/* - * - */ +static int ip6_convert_metrics(struct mx6_config *mxc, + const struct fib6_config *cfg) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (cfg->fc_mx == NULL) + return 0; + + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (unlikely(!mp)) + return -ENOMEM; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + u32 val; + + if (unlikely(type > RTAX_MAX)) + goto err; + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(tmp); + if (val == TCP_CA_UNSPEC) + goto err; + } else { + val = nla_get_u32(nla); + } + + mp[type - 1] = val; + __set_bit(type - 1, mxc->mx_valid); + } + } + + mxc->mx = mp; + + return 0; + err: + kfree(mp); + return -EINVAL; +} int ip6_route_add(struct fib6_config *cfg) { @@ -1482,6 +1525,7 @@ int ip6_route_add(struct fib6_config *cfg) struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; + struct mx6_config mxc = { .mx = NULL, }; int addr_type; if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) @@ -1677,8 +1721,14 @@ install_route: cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); + err = ip6_convert_metrics(&mxc, cfg); + if (err) + goto out; + + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); + kfree(mxc.mx); + return err; out: if (dev) dev_put(dev); @@ -2534,7 +2584,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_OIF */ + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ - + nla_total_size(sizeof(struct rta_cacheinfo)); + + nla_total_size(sizeof(struct rta_cacheinfo)) + + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ } static int rt6_fill_node(struct net *net, @@ -2675,7 +2726,8 @@ static int rt6_fill_node(struct net *net, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 213546bd6d5d..e4cbd5798eba 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1506,12 +1506,12 @@ static bool ipip6_netlink_encap_parms(struct nlattr *data[], if (data[IFLA_IPTUN_ENCAP_SPORT]) { ret = true; - ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]); + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); } if (data[IFLA_IPTUN_ENCAP_DPORT]) { ret = true; - ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]); + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); } return ret; @@ -1707,9 +1707,9 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT, + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) @@ -1763,6 +1763,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, .dellink = ipip6_dellink, + .get_link_net = ip_tunnel_get_link_net, }; static struct xfrm_tunnel sit_handler __read_mostly = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5ff87805258e..5d46832c6f72 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1199,6 +1199,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); + tcp_ca_openreq_child(newsk, dst); + tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && @@ -1387,6 +1389,28 @@ ipv6_pktoptions: return 0; } +static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, + const struct tcphdr *th) +{ + /* This is tricky: we move IP6CB at its correct location into + * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because + * _decode_session6() uses IP6CB(). + * barrier() makes sure compiler won't play aliasing games. + */ + memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), + sizeof(struct inet6_skb_parm)); + barrier(); + + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + skb->len - th->doff*4); + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_tw_isn = 0; + TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); + TCP_SKB_CB(skb)->sacked = 0; +} + static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; @@ -1418,24 +1442,9 @@ static int tcp_v6_rcv(struct sk_buff *skb) th = tcp_hdr(skb); hdr = ipv6_hdr(skb); - /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() - * barrier() makes sure compiler wont play fool^Waliasing games. - */ - memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), - sizeof(struct inet6_skb_parm)); - barrier(); - - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff*4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; - TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); - TCP_SKB_CB(skb)->sacked = 0; sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, - tcp_v6_iif(skb)); + inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1451,6 +1460,8 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + tcp_v6_fill_cb(skb, hdr, th); + #ifdef CONFIG_TCP_MD5SIG if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; @@ -1482,6 +1493,8 @@ no_tcp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; + tcp_v6_fill_cb(skb, hdr, th); + if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { csum_error: TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); @@ -1505,6 +1518,8 @@ do_time_wait: goto discard_it; } + tcp_v6_fill_cb(skb, hdr, th); + if (skb->len < (th->doff<<2)) { inet_twsk_put(inet_twsk(sk)); goto bad_packet; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 189dc4ae3eca..d048d46779fc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -909,7 +909,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; } - if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, ip6_compute_pseudo); @@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, { unsigned int offset; struct udphdr *uh = udp_hdr(skb); + struct sk_buff *frags = skb_shinfo(skb)->frag_list; __wsum csum = 0; - if (skb_queue_len(&sk->sk_write_queue) == 1) { + if (!frags) { /* Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); @@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } + do { + csum = csum_add(csum, frags->csum); + } while ((frags = frags->next)); uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, csum); @@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * Sending */ -static int udp_v6_push_pending_frames(struct sock *sk) +static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) { - struct sk_buff *skb; + struct sock *sk = skb->sk; struct udphdr *uh; - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; - - if (up->pending == AF_INET) - return udp_push_pending_frames(sk); - - fl6 = &inet->cork.fl.u.ip6; - - /* Grab the skbuff where UDP header space exists. */ - skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) - goto out; + int offset = skb_transport_offset(skb); + int len = skb->len - offset; /* * Create a UDP header @@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk) uh = udp_hdr(skb); uh->source = fl6->fl6_sport; uh->dest = fl6->fl6_dport; - uh->len = htons(up->len); + uh->len = htons(len); uh->check = 0; if (is_udplite) - csum = udplite_csum_outgoing(sk, skb); - else if (up->no_check6_tx) { /* UDP csum disabled */ + csum = udplite_csum(skb); + else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, - up->len); + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; } else - csum = udp_csum_outgoing(sk, skb); + csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, - up->len, fl6->flowi6_proto, csum); + len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: - err = ip6_push_pending_frames(sk); + err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { UDP6_INC_STATS_USER(sock_net(sk), @@ -1082,6 +1071,30 @@ send: } else UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +static int udp_v6_push_pending_frames(struct sock *sk) +{ + struct sk_buff *skb; + struct udp_sock *up = udp_sk(sk); + struct flowi6 fl6; + int err = 0; + + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + /* ip6_finish_skb will release the cork, so make a copy of + * fl6 here. + */ + fl6 = inet_sk(sk)->cork.fl.u.ip6; + + skb = ip6_finish_skb(sk); + if (!skb) + goto out; + + err = udp_v6_send_skb(skb, &fl6); + out: up->len = 0; up->pending = 0; @@ -1164,6 +1177,7 @@ do_udp_sendmsg: if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { /* * There are pending frames. @@ -1294,6 +1308,20 @@ do_udp_sendmsg: goto do_confirm; back_from_confirm: + /* Lockless fast path for the non-corking case */ + if (!corkreq) { + struct sk_buff *skb; + + skb = ip6_make_skb(sk, getfrag, msg, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, + &fl6, (struct rt6_info *)dst, + msg->msg_flags, dontfrag); + err = PTR_ERR(skb); + if (!IS_ERR_OR_NULL(skb)) + err = udp_v6_send_skb(skb, &fl6); + goto release_dst; + } + lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ @@ -1311,7 +1339,6 @@ do_append_data: if (dontfrag < 0) dontfrag = np->dontfrag; up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, @@ -1323,6 +1350,11 @@ do_append_data: else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) up->pending = 0; + if (err > 0) + err = np->recverr ? net_xmit_errno(err) : 0; + release_sock(sk); + +release_dst: if (dst) { if (connected) { ip6_dst_store(sk, dst, @@ -1339,9 +1371,6 @@ do_append_data: dst = NULL; } - if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; - release_sock(sk); out: dst_release(dst); fl6_sock_release(flowlabel); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b6aa8ed18257..ab889bb16b3c 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -52,6 +52,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + /* Set the IPv6 fragment id if not set yet */ + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(skb); + segs = NULL; goto out; } @@ -108,7 +112,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - fptr->identification = skb_shinfo(skb)->ip6_frag_id; + if (skb_shinfo(skb)->ip6_frag_id) + fptr->identification = skb_shinfo(skb)->ip6_frag_id; + else + ipv6_select_ident(fptr, + (struct rt6_info *)skb_dst(skb)); /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() @@ -153,9 +161,13 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) + if (uh->check) { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } return udp_gro_complete(skb, nhoff); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 5f983644373a..48bf5a06847b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -130,12 +130,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { struct flowi6 *fl6 = &fl->u.ip6; int onlyproto = 0; - u16 offset = skb_network_header_len(skb); const struct ipv6hdr *hdr = ipv6_hdr(skb); + u16 offset = sizeof(*hdr); struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); - u8 nexthdr = nh[IP6CB(skb)->nhoff]; + u16 nhoff = IP6CB(skb)->nhoff; int oif = 0; + u8 nexthdr; + + if (!nhoff) + nhoff = offsetof(struct ipv6hdr, nexthdr); + + nexthdr = nh[nhoff]; if (skb_dst(skb)) oif = skb_dst(skb)->dev->ifindex; diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 7f2cafddfb6e..1cde711bcab5 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -533,7 +533,7 @@ void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery) info.discovery = discovery; /* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */ - self->slot_timeout = sysctl_slot_timeout * HZ / 1000; + self->slot_timeout = msecs_to_jiffies(sysctl_slot_timeout); irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info); } @@ -1015,13 +1015,15 @@ void irlap_apply_connection_parameters(struct irlap_cb *self, int now) * Or, this is how much we can keep the pf bit in primary mode. * Therefore, it must be lower or equal than our *OWN* max turn around. * Jean II */ - self->poll_timeout = self->qos_tx.max_turn_time.value * HZ / 1000; + self->poll_timeout = msecs_to_jiffies( + self->qos_tx.max_turn_time.value); /* The Final timeout applies only to the primary station. * It defines the maximum time the primary wait (mostly in RECV mode) * for an answer from the secondary station before polling it again. * Therefore, it must be greater or equal than our *PARTNER* * max turn around time - Jean II */ - self->final_timeout = self->qos_rx.max_turn_time.value * HZ / 1000; + self->final_timeout = msecs_to_jiffies( + self->qos_rx.max_turn_time.value); /* The Watchdog Bit timeout applies only to the secondary station. * It defines the maximum time the secondary wait (mostly in RECV mode) * for poll from the primary station before getting annoyed. diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0ac907adb2f4..b4e923f77954 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -40,6 +40,18 @@ static struct genl_family l2tp_nl_family = { .netnsok = true, }; +static const struct genl_multicast_group l2tp_multicast_group[] = { + { + .name = L2TP_GENL_MCGROUP, + }, +}; + +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, + int flags, struct l2tp_tunnel *tunnel, u8 cmd); +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, + int flags, struct l2tp_session *session, + u8 cmd); + /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; @@ -97,6 +109,52 @@ out: return ret; } +static int l2tp_tunnel_notify(struct genl_family *family, + struct genl_info *info, + struct l2tp_tunnel *tunnel, + u8 cmd) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, tunnel, cmd); + + if (ret >= 0) + return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + + nlmsg_free(msg); + + return ret; +} + +static int l2tp_session_notify(struct genl_family *family, + struct genl_info *info, + struct l2tp_session *session, + u8 cmd) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, + NLM_F_ACK, session, cmd); + + if (ret >= 0) + return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + + nlmsg_free(msg); + + return ret; +} + static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info) { u32 tunnel_id; @@ -188,6 +246,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info break; } + if (ret >= 0) + ret = l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_CREATE); out: return ret; } @@ -211,6 +272,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info goto out; } + l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_DELETE); + (void) l2tp_tunnel_delete(tunnel); out: @@ -239,12 +303,15 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info if (info->attrs[L2TP_ATTR_DEBUG]) tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]); + ret = l2tp_tunnel_notify(&l2tp_nl_family, info, + tunnel, L2TP_CMD_TUNNEL_MODIFY); + out: return ret; } static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, - struct l2tp_tunnel *tunnel) + struct l2tp_tunnel *tunnel, u8 cmd) { void *hdr; struct nlattr *nest; @@ -254,8 +321,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla struct ipv6_pinfo *np = NULL; #endif - hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, - L2TP_CMD_TUNNEL_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; @@ -324,7 +390,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla } out: - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -359,7 +426,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) } ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, - NLM_F_ACK, tunnel); + NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET); if (ret < 0) goto err_out; @@ -385,7 +452,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - tunnel) <= 0) + tunnel, L2TP_CMD_TUNNEL_GET) < 0) goto out; ti++; @@ -539,6 +606,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id, session_id, peer_session_id, &cfg); + if (ret >= 0) { + session = l2tp_session_find(net, tunnel, session_id); + if (session) + ret = l2tp_session_notify(&l2tp_nl_family, info, session, + L2TP_CMD_SESSION_CREATE); + } + out: return ret; } @@ -555,6 +629,9 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf goto out; } + l2tp_session_notify(&l2tp_nl_family, info, + session, L2TP_CMD_SESSION_DELETE); + pw_type = session->pwtype; if (pw_type < __L2TP_PWTYPE_MAX) if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) @@ -601,12 +678,15 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_MRU]) session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]); + ret = l2tp_session_notify(&l2tp_nl_family, info, + session, L2TP_CMD_SESSION_MODIFY); + out: return ret; } static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, - struct l2tp_session *session) + struct l2tp_session *session, u8 cmd) { void *hdr; struct nlattr *nest; @@ -615,7 +695,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl sk = tunnel->sock; - hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; @@ -673,7 +753,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl goto nla_put_failure; nla_nest_end(skb, nest); - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -699,7 +780,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, - 0, session); + 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) goto err_out; @@ -737,7 +818,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session) <= 0) + session, L2TP_CMD_SESSION_GET) < 0) break; si++; @@ -896,7 +977,9 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); static int l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); - return genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops); + return genl_register_family_with_ops_groups(&l2tp_nl_family, + l2tp_nl_ops, + l2tp_multicast_group); } static void l2tp_nl_cleanup(void) diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 612a5ddaf93b..799bafc2af39 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -18,28 +18,28 @@ static struct ctl_table llc2_timeout_table[] = { { .procname = "ack", .data = &sysctl_llc2_ack_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_ack_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "busy", .data = &sysctl_llc2_busy_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_busy_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "p", .data = &sysctl_llc2_p_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_p_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "rej", .data = &sysctl_llc2_rej_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_rej_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 75cc6801a431..64a012a0c6e5 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -5,6 +5,7 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRYPTO_CCM + select CRYPTO_GCM select CRC32 select AVERAGE ---help--- diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index e53671b1105e..3275f01881be 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -15,7 +15,9 @@ mac80211-y := \ michael.o \ tkip.o \ aes_ccm.o \ + aes_gcm.o \ aes_cmac.o \ + aes_gmac.o \ cfg.o \ ethtool.o \ rx.o \ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 09d9caaec591..7869bb40acaa 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -20,7 +20,8 @@ #include "aes_ccm.h" void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) + u8 *data, size_t data_len, u8 *mic, + size_t mic_len) { struct scatterlist assoc, pt, ct[2]; @@ -35,7 +36,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); sg_init_table(ct, 2); sg_set_buf(&ct[0], data, data_len); - sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); + sg_set_buf(&ct[1], mic, mic_len); aead_request_set_tfm(aead_req, tfm); aead_request_set_assoc(aead_req, &assoc, assoc.length); @@ -45,7 +46,8 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) + u8 *data, size_t data_len, u8 *mic, + size_t mic_len) { struct scatterlist assoc, pt, ct[2]; char aead_req_data[sizeof(struct aead_request) + @@ -62,17 +64,18 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); sg_init_table(ct, 2); sg_set_buf(&ct[0], data, data_len); - sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN); + sg_set_buf(&ct[1], mic, mic_len); aead_request_set_tfm(aead_req, tfm); aead_request_set_assoc(aead_req, &assoc, assoc.length); - aead_request_set_crypt(aead_req, ct, &pt, - data_len + IEEE80211_CCMP_MIC_LEN, b_0); + aead_request_set_crypt(aead_req, ct, &pt, data_len + mic_len, b_0); return crypto_aead_decrypt(aead_req); } -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]) +struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], + size_t key_len, + size_t mic_len) { struct crypto_aead *tfm; int err; @@ -81,9 +84,9 @@ struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]) if (IS_ERR(tfm)) return tfm; - err = crypto_aead_setkey(tfm, key, WLAN_KEY_LEN_CCMP); + err = crypto_aead_setkey(tfm, key, key_len); if (!err) - err = crypto_aead_setauthsize(tfm, IEEE80211_CCMP_MIC_LEN); + err = crypto_aead_setauthsize(tfm, mic_len); if (!err) return tfm; diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 2c7ab1948a2e..6a73d1e4d186 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -12,11 +12,15 @@ #include <linux/crypto.h> -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]); +struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], + size_t key_len, + size_t mic_len); void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); + u8 *data, size_t data_len, u8 *mic, + size_t mic_len); int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); + u8 *data, size_t data_len, u8 *mic, + size_t mic_len); void ieee80211_aes_key_free(struct crypto_aead *tfm); #endif /* AES_CCM_H */ diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index 9b9009f99551..4192806be3d3 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -18,8 +18,8 @@ #include "key.h" #include "aes_cmac.h" -#define AES_CMAC_KEY_LEN 16 #define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */ +#define CMAC_TLEN_256 16 /* CMAC TLen = 128 bits (16 octets) */ #define AAD_LEN 20 @@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad) pad[AES_BLOCK_SIZE - 1] ^= 0x87; } - -static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, - const u8 *addr[], const size_t *len, u8 *mac) +static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, + const u8 *addr[], const size_t *len, u8 *mac, + size_t mac_len) { u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; const u8 *pos, *end; @@ -88,7 +88,7 @@ static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, for (i = 0; i < AES_BLOCK_SIZE; i++) pad[i] ^= cbc[i]; crypto_cipher_encrypt_one(tfm, pad, pad); - memcpy(mac, pad, CMAC_TLEN); + memcpy(mac, pad, mac_len); } @@ -107,17 +107,35 @@ void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, addr[2] = zero; len[2] = CMAC_TLEN; - aes_128_cmac_vector(tfm, 3, addr, len, mic); + aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN); } +void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad, + const u8 *data, size_t data_len, u8 *mic) +{ + const u8 *addr[3]; + size_t len[3]; + u8 zero[CMAC_TLEN_256]; + + memset(zero, 0, CMAC_TLEN_256); + addr[0] = aad; + len[0] = AAD_LEN; + addr[1] = data; + len[1] = data_len - CMAC_TLEN_256; + addr[2] = zero; + len[2] = CMAC_TLEN_256; + + aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN_256); +} -struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]) +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], + size_t key_len) { struct crypto_cipher *tfm; tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (!IS_ERR(tfm)) - crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN); + crypto_cipher_setkey(tfm, key, key_len); return tfm; } diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index 0ce6487af795..3702041f44fd 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -11,9 +11,12 @@ #include <linux/crypto.h> -struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]); +struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], + size_t key_len); void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic); +void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad, + const u8 *data, size_t data_len, u8 *mic); void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm); #endif /* AES_CMAC_H */ diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c new file mode 100644 index 000000000000..c2bf6698d738 --- /dev/null +++ b/net/mac80211/aes_gcm.c @@ -0,0 +1,95 @@ +/* + * Copyright 2014-2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/aes.h> + +#include <net/mac80211.h> +#include "key.h" +#include "aes_gcm.h" + +void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic) +{ + struct scatterlist assoc, pt, ct[2]; + + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *)aead_req_data; + + memset(aead_req, 0, sizeof(aead_req_data)); + + sg_init_one(&pt, data, data_len); + sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); + sg_init_table(ct, 2); + sg_set_buf(&ct[0], data, data_len); + sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN); + + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, &pt, ct, data_len, j_0); + + crypto_aead_encrypt(aead_req); +} + +int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic) +{ + struct scatterlist assoc, pt, ct[2]; + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *)aead_req_data; + + if (data_len == 0) + return -EINVAL; + + memset(aead_req, 0, sizeof(aead_req_data)); + + sg_init_one(&pt, data, data_len); + sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad)); + sg_init_table(ct, 2); + sg_set_buf(&ct[0], data, data_len); + sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN); + + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, &assoc, assoc.length); + aead_request_set_crypt(aead_req, ct, &pt, + data_len + IEEE80211_GCMP_MIC_LEN, j_0); + + return crypto_aead_decrypt(aead_req); +} + +struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], + size_t key_len) +{ + struct crypto_aead *tfm; + int err; + + tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return tfm; + + err = crypto_aead_setkey(tfm, key, key_len); + if (!err) + err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN); + if (!err) + return tfm; + + crypto_free_aead(tfm); + return ERR_PTR(err); +} + +void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm) +{ + crypto_free_aead(tfm); +} diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h new file mode 100644 index 000000000000..1347fda6b76a --- /dev/null +++ b/net/mac80211/aes_gcm.h @@ -0,0 +1,22 @@ +/* + * Copyright 2014-2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef AES_GCM_H +#define AES_GCM_H + +#include <linux/crypto.h> + +void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic); +int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic); +struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], + size_t key_len); +void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm); + +#endif /* AES_GCM_H */ diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c new file mode 100644 index 000000000000..1c72edcb0083 --- /dev/null +++ b/net/mac80211/aes_gmac.c @@ -0,0 +1,84 @@ +/* + * AES-GMAC for IEEE 802.11 BIP-GMAC-128 and BIP-GMAC-256 + * Copyright 2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/aes.h> + +#include <net/mac80211.h> +#include "key.h" +#include "aes_gmac.h" + +#define GMAC_MIC_LEN 16 +#define GMAC_NONCE_LEN 12 +#define AAD_LEN 20 + +int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, + const u8 *data, size_t data_len, u8 *mic) +{ + struct scatterlist sg[3], ct[1]; + char aead_req_data[sizeof(struct aead_request) + + crypto_aead_reqsize(tfm)] + __aligned(__alignof__(struct aead_request)); + struct aead_request *aead_req = (void *)aead_req_data; + u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE]; + + if (data_len < GMAC_MIC_LEN) + return -EINVAL; + + memset(aead_req, 0, sizeof(aead_req_data)); + + memset(zero, 0, GMAC_MIC_LEN); + sg_init_table(sg, 3); + sg_set_buf(&sg[0], aad, AAD_LEN); + sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); + sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); + + memcpy(iv, nonce, GMAC_NONCE_LEN); + memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN); + iv[AES_BLOCK_SIZE - 1] = 0x01; + + sg_init_table(ct, 1); + sg_set_buf(&ct[0], mic, GMAC_MIC_LEN); + + aead_request_set_tfm(aead_req, tfm); + aead_request_set_assoc(aead_req, sg, AAD_LEN + data_len); + aead_request_set_crypt(aead_req, NULL, ct, 0, iv); + + crypto_aead_encrypt(aead_req); + + return 0; +} + +struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], + size_t key_len) +{ + struct crypto_aead *tfm; + int err; + + tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return tfm; + + err = crypto_aead_setkey(tfm, key, key_len); + if (!err) + return tfm; + if (!err) + err = crypto_aead_setauthsize(tfm, GMAC_MIC_LEN); + + crypto_free_aead(tfm); + return ERR_PTR(err); +} + +void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm) +{ + crypto_free_aead(tfm); +} diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h new file mode 100644 index 000000000000..d328204d73a8 --- /dev/null +++ b/net/mac80211/aes_gmac.h @@ -0,0 +1,20 @@ +/* + * Copyright 2015, Qualcomm Atheros, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef AES_GMAC_H +#define AES_GMAC_H + +#include <linux/crypto.h> + +struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], + size_t key_len); +int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, + const u8 *data, size_t data_len, u8 *mic); +void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm); + +#endif /* AES_GMAC_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e75d5c53e97b..dd4ff36c557a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -162,8 +162,13 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: break; default: cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type); @@ -348,6 +353,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq_len = 6; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn64 = atomic64_read(&key->u.ccmp.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; @@ -359,6 +365,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq_len = 6; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); seq[0] = pn64; seq[1] = pn64 >> 8; @@ -369,6 +376,30 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + params.seq = seq; + params.seq_len = 6; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn64 = atomic64_read(&key->u.gcmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + params.seq = seq; + params.seq_len = 6; + break; } params.key = key->conf.key; @@ -428,11 +459,13 @@ void sta_set_rate_info_tx(struct sta_info *sta, rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) - rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; - if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) - rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH; - if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) - rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH; + rinfo->bw = RATE_INFO_BW_40; + else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) + rinfo->bw = RATE_INFO_BW_80; + else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) + rinfo->bw = RATE_INFO_BW_160; + else + rinfo->bw = RATE_INFO_BW_20; if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } @@ -459,16 +492,21 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); } - if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) - rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; - if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ) - rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH; - if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80P80MHZ) - rinfo->flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH; - if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ) - rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH; + + if (sta->last_rx_rate_flag & RX_FLAG_5MHZ) + rinfo->bw = RATE_INFO_BW_5; + else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ) + rinfo->bw = RATE_INFO_BW_10; + else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) + rinfo->bw = RATE_INFO_BW_40; + else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ) + rinfo->bw = RATE_INFO_BW_80; + else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ) + rinfo->bw = RATE_INFO_BW_160; + else + rinfo->bw = RATE_INFO_BW_20; } static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, @@ -678,7 +716,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_SSID | - BSS_CHANGED_P2P_PS; + BSS_CHANGED_P2P_PS | + BSS_CHANGED_TXPOWER; int err; old = sdata_dereference(sdata->u.ap.beacon, sdata); @@ -2102,6 +2141,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; + enum nl80211_tx_power_setting txp_type = type; + bool update_txp_type = false; if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); @@ -2109,6 +2150,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, switch (type) { case NL80211_TX_POWER_AUTOMATIC: sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL; + txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: @@ -2118,7 +2160,12 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, break; } - ieee80211_recalc_txpower(sdata); + if (txp_type != sdata->vif.bss_conf.txpower_type) { + update_txp_type = true; + sdata->vif.bss_conf.txpower_type = txp_type; + } + + ieee80211_recalc_txpower(sdata, update_txp_type); return 0; } @@ -2126,6 +2173,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, switch (type) { case NL80211_TX_POWER_AUTOMATIC: local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; + txp_type = NL80211_TX_POWER_LIMITED; break; case NL80211_TX_POWER_LIMITED: case NL80211_TX_POWER_FIXED: @@ -2136,10 +2184,14 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, } mutex_lock(&local->iflist_mtx); - list_for_each_entry(sdata, &local->interfaces, list) + list_for_each_entry(sdata, &local->interfaces, list) { sdata->user_power_level = local->user_power_level; + if (txp_type != sdata->vif.bss_conf.txpower_type) + update_txp_type = true; + sdata->vif.bss_conf.txpower_type = txp_type; + } list_for_each_entry(sdata, &local->interfaces, list) - ieee80211_recalc_txpower(sdata); + ieee80211_recalc_txpower(sdata, update_txp_type); mutex_unlock(&local->iflist_mtx); return 0; @@ -2556,7 +2608,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, /* if there's one pending or we're scanning, queue this one */ if (!list_empty(&local->roc_list) || - local->scanning || local->radar_detect_enabled) + local->scanning || ieee80211_is_radar_required(local)) goto out_check_combine; /* if not HW assist, just queue & schedule work */ @@ -3664,7 +3716,7 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, * queues. */ synchronize_net(); - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); /* restore the normal QoS parameters * (unconditionally to avoid races) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index da1c12c34487..ff0d2db09df9 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -388,7 +388,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, return NULL; } -static bool ieee80211_is_radar_required(struct ieee80211_local *local) +bool ieee80211_is_radar_required(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; @@ -406,6 +406,34 @@ static bool ieee80211_is_radar_required(struct ieee80211_local *local) return false; } +static bool +ieee80211_chanctx_radar_required(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) +{ + struct ieee80211_chanctx_conf *conf = &ctx->conf; + struct ieee80211_sub_if_data *sdata; + bool required = false; + + lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_held(&local->mtx); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata)) + continue; + if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) + continue; + if (!sdata->radar_required) + continue; + + required = true; + break; + } + rcu_read_unlock(); + + return required; +} + static struct ieee80211_chanctx * ieee80211_alloc_chanctx(struct ieee80211_local *local, const struct cfg80211_chan_def *chandef, @@ -425,7 +453,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_static = 1; ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; - ctx->conf.radar_enabled = ieee80211_is_radar_required(local); + ctx->conf.radar_enabled = false; ieee80211_recalc_chanctx_min_def(local, ctx); return ctx; @@ -567,16 +595,15 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, bool radar_enabled; lockdep_assert_held(&local->chanctx_mtx); - /* for setting local->radar_detect_enabled */ + /* for ieee80211_is_radar_required */ lockdep_assert_held(&local->mtx); - radar_enabled = ieee80211_is_radar_required(local); + radar_enabled = ieee80211_chanctx_radar_required(local, chanctx); if (radar_enabled == chanctx->conf.radar_enabled) return; chanctx->conf.radar_enabled = radar_enabled; - local->radar_detect_enabled = chanctx->conf.radar_enabled; if (!local->use_chanctx) { local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; @@ -628,7 +655,7 @@ out: } if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) { - ieee80211_recalc_txpower(sdata); + ieee80211_recalc_txpower(sdata, false); ieee80211_recalc_chanctx_min_def(local, new_ctx); } @@ -1360,7 +1387,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_recalc_txpower(sdata); + ieee80211_recalc_txpower(sdata, false); } ieee80211_recalc_chanctx_chantype(local, ctx); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 54a189f0393e..eeb0bbd69d98 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -303,8 +303,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n"); if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE) sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n"); - if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) - sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n"); if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) sf += scnprintf(buf + sf, mxln - sf, "REPORTS_TX_ACK_STATUS\n"); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 5523b94c7c90..71ac1b5f4da5 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -94,17 +94,33 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.tx.iv16); break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn = atomic64_read(&key->u.ccmp.tx_pn); len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn = atomic64_read(&key->u.aes_cmac.tx_pn); len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn = atomic64_read(&key->u.aes_gmac.tx_pn); + len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", + (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), + (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_read(&key->u.gcmp.tx_pn); + len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", + (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), + (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); + break; default: return 0; } @@ -134,6 +150,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, len = p - buf; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { rpn = key->u.ccmp.rx_pn[i]; p += scnprintf(p, sizeof(buf)+buf-p, @@ -144,6 +161,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, len = p - buf; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: rpn = key->u.aes_cmac.rx_pn; p += scnprintf(p, sizeof(buf)+buf-p, "%02x%02x%02x%02x%02x%02x\n", @@ -151,6 +169,26 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, rpn[3], rpn[4], rpn[5]); len = p - buf; break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + rpn = key->u.aes_gmac.rx_pn; + p += scnprintf(p, sizeof(buf)+buf-p, + "%02x%02x%02x%02x%02x%02x\n", + rpn[0], rpn[1], rpn[2], + rpn[3], rpn[4], rpn[5]); + len = p - buf; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { + rpn = key->u.gcmp.rx_pn[i]; + p += scnprintf(p, sizeof(buf)+buf-p, + "%02x%02x%02x%02x%02x%02x\n", + rpn[0], rpn[1], rpn[2], + rpn[3], rpn[4], rpn[5]); + } + len = p - buf; + break; default: return 0; } @@ -167,12 +205,23 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.replays); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + len = scnprintf(buf, sizeof(buf), "%u\n", + key->u.aes_gmac.replays); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + len = scnprintf(buf, sizeof(buf), "%u\n", key->u.gcmp.replays); + break; default: return 0; } @@ -189,9 +238,15 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf, switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.icverrors); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + len = scnprintf(buf, sizeof(buf), "%u\n", + key->u.aes_gmac.icverrors); + break; default: return 0; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 2ebc9ead9695..fdeda17b8dd2 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -639,6 +639,21 @@ static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_sta_statistics(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct station_info *sinfo) +{ + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_sta_statistics(local, sdata, sta); + if (local->ops->sta_statistics) + local->ops->sta_statistics(&local->hw, &sdata->vif, sta, sinfo); + trace_drv_return_void(local); +} + static inline int drv_conf_tx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u16 ac, const struct ieee80211_tx_queue_params *params) @@ -966,21 +981,6 @@ drv_allow_buffered_frames(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline int drv_get_rssi(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - s8 *rssi_dbm) -{ - int ret; - - might_sleep(); - - ret = local->ops->get_rssi(&local->hw, &sdata->vif, sta, rssi_dbm); - trace_drv_get_rssi(local, sta, *rssi_dbm, ret); - - return ret; -} - static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index ebfc8091557b..52bcea6ad9e8 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -117,16 +117,16 @@ static void ieee80211_get_stats(struct net_device *dev, data[i++] = sta->sta_state; - if (sinfo.filled & STATION_INFO_TX_BITRATE) + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) data[i] = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); i++; - if (sinfo.filled & STATION_INFO_RX_BITRATE) + if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE)) data[i] = 100000 * cfg80211_calculate_bitrate(&sinfo.rxrate); i++; - if (sinfo.filled & STATION_INFO_SIGNAL_AVG) + if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG)) data[i] = (u8)sinfo.signal_avg; i++; } else { @@ -175,24 +175,24 @@ do_survey: data[i++] = (u8)survey.noise; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME) - data[i++] = survey.channel_time; + if (survey.filled & SURVEY_INFO_TIME) + data[i++] = survey.time; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY) - data[i++] = survey.channel_time_busy; + if (survey.filled & SURVEY_INFO_TIME_BUSY) + data[i++] = survey.time_busy; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) - data[i++] = survey.channel_time_ext_busy; + if (survey.filled & SURVEY_INFO_TIME_EXT_BUSY) + data[i++] = survey.time_ext_busy; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX) - data[i++] = survey.channel_time_rx; + if (survey.filled & SURVEY_INFO_TIME_RX) + data[i++] = survey.time_rx; else data[i++] = -1LL; - if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX) - data[i++] = survey.channel_time_tx; + if (survey.filled & SURVEY_INFO_TIME_TX) + data[i++] = survey.time_tx; else data[i++] = -1LL; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 509bc157ce55..b606b53a49a7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1069,9 +1069,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } if (sta && rates_updated) { - drv_sta_rc_update(local, sdata, &sta->sta, - IEEE80211_RC_SUPP_RATES_CHANGED); + u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED; + u8 rx_nss = sta->sta.rx_nss; + + /* Force rx_nss recalculation */ + sta->sta.rx_nss = 0; rate_control_rate_init(sta); + if (sta->sta.rx_nss != rx_nss) + changed |= IEEE80211_RC_NSS_CHANGED; + + drv_sta_rc_update(local, sdata, &sta->sta, changed); } rcu_read_unlock(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cc6e964d9837..3afe36824703 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1168,8 +1168,6 @@ struct ieee80211_local { /* wowlan is enabled -- don't reconfig on resume */ bool wowlan; - /* DFS/radar detection is enabled */ - bool radar_detect_enabled; struct work_struct radar_detected_work; /* number of RX chains the hardware has */ @@ -1623,7 +1621,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local); void ieee80211_del_virtual_monitor(struct ieee80211_local *local); bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); -void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); +void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, + bool update_bss); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { @@ -1704,6 +1703,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const struct ieee80211_vht_cap *vht_cap_ie, struct sta_info *sta); +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -1752,7 +1752,8 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), + WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) && + !test_bit(SCAN_COMPLETED, &local->scanning), "%s: resume with hardware scan still in progress\n", wiphy_name(hw->wiphy)); @@ -1881,10 +1882,40 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, void ieee80211_add_pending_skbs(struct ieee80211_local *local, struct sk_buff_head *skbs); void ieee80211_flush_queues(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata); + struct ieee80211_sub_if_data *sdata, bool drop); void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - unsigned int queues); + unsigned int queues, bool drop); + +static inline bool ieee80211_can_run_worker(struct ieee80211_local *local) +{ + /* + * If quiescing is set, we are racing with __ieee80211_suspend. + * __ieee80211_suspend flushes the workers after setting quiescing, + * and we check quiescing / suspended before enqueing new workers. + * We should abort the worker to avoid the races below. + */ + if (local->quiescing) + return false; + + /* + * We might already be suspended if the following scenario occurs: + * __ieee80211_suspend Control path + * + * if (local->quiescing) + * return; + * local->quiescing = true; + * flush_workqueue(); + * queue_work(...); + * local->suspended = true; + * local->quiescing = false; + * worker starts running... + */ + if (local->suspended) + return false; + + return true; +} void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, @@ -1981,6 +2012,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_chanctx *ctx); +bool ieee80211_is_radar_required(struct ieee80211_local *local); void ieee80211_dfs_cac_timer(unsigned long data); void ieee80211_dfs_cac_timer_work(struct work_struct *work); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 417355390873..81a27516813e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -73,9 +73,10 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) return false; } -void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) +void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata, + bool update_bss) { - if (__ieee80211_recalc_txpower(sdata)) + if (__ieee80211_recalc_txpower(sdata) || update_bss) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } @@ -93,7 +94,7 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local) if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); local->hw.conf.flags |= IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; @@ -1169,12 +1170,7 @@ static void ieee80211_iface_work(struct work_struct *work) if (local->scanning) return; - /* - * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the rest. - */ - if (WARN(local->suspended, - "interface work scheduled while going to suspend\n")) + if (!ieee80211_can_run_worker(local)) return; /* first process frames */ diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0bb7038121ac..0825d76edcfc 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -24,6 +24,8 @@ #include "debugfs_key.h" #include "aes_ccm.h" #include "aes_cmac.h" +#include "aes_gmac.h" +#include "aes_gcm.h" /** @@ -90,7 +92,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - int ret; + int ret = -EOPNOTSUPP; might_sleep(); @@ -140,7 +142,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) sdata->crypto_tx_tailroom_needed_cnt--; WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && @@ -149,7 +152,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) return 0; } - if (ret != -ENOSPC && ret != -EOPNOTSUPP) + if (ret != -ENOSPC && ret != -EOPNOTSUPP && ret != 1) sdata_err(sdata, "failed to set key (%d, %pM) to hardware (%d)\n", key->conf.keyidx, @@ -161,8 +164,18 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) case WLAN_CIPHER_SUITE_WEP104: case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: - /* all of these we can do in software */ + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* all of these we can do in software - if driver can */ + if (ret == 1) + return 0; + if (key->local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL) + return -EINVAL; return 0; default: return -EINVAL; @@ -188,7 +201,8 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta = key->sta; sdata = key->sdata; - if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(sdata); ret = drv_set_key(key->local, DISABLE_KEY, sdata, @@ -380,7 +394,26 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. */ - key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); + key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( + key_data, key_len, IEEE80211_CCMP_MIC_LEN); + if (IS_ERR(key->u.ccmp.tfm)) { + err = PTR_ERR(key->u.ccmp.tfm); + kfree(key); + return ERR_PTR(err); + } + break; + case WLAN_CIPHER_SUITE_CCMP_256: + key->conf.iv_len = IEEE80211_CCMP_256_HDR_LEN; + key->conf.icv_len = IEEE80211_CCMP_256_MIC_LEN; + for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) + for (j = 0; j < IEEE80211_CCMP_256_PN_LEN; j++) + key->u.ccmp.rx_pn[i][j] = + seq[IEEE80211_CCMP_256_PN_LEN - j - 1]; + /* Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( + key_data, key_len, IEEE80211_CCMP_256_MIC_LEN); if (IS_ERR(key->u.ccmp.tfm)) { err = PTR_ERR(key->u.ccmp.tfm); kfree(key); @@ -388,8 +421,12 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, } break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: key->conf.iv_len = 0; - key->conf.icv_len = sizeof(struct ieee80211_mmie); + if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) + key->conf.icv_len = sizeof(struct ieee80211_mmie); + else + key->conf.icv_len = sizeof(struct ieee80211_mmie_16); if (seq) for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = @@ -399,13 +436,51 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, * it does not need to be initialized for every packet. */ key->u.aes_cmac.tfm = - ieee80211_aes_cmac_key_setup(key_data); + ieee80211_aes_cmac_key_setup(key_data, key_len); if (IS_ERR(key->u.aes_cmac.tfm)) { err = PTR_ERR(key->u.aes_cmac.tfm); kfree(key); return ERR_PTR(err); } break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + key->conf.iv_len = 0; + key->conf.icv_len = sizeof(struct ieee80211_mmie_16); + if (seq) + for (j = 0; j < IEEE80211_GMAC_PN_LEN; j++) + key->u.aes_gmac.rx_pn[j] = + seq[IEEE80211_GMAC_PN_LEN - j - 1]; + /* Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.aes_gmac.tfm = + ieee80211_aes_gmac_key_setup(key_data, key_len); + if (IS_ERR(key->u.aes_gmac.tfm)) { + err = PTR_ERR(key->u.aes_gmac.tfm); + kfree(key); + return ERR_PTR(err); + } + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + key->conf.iv_len = IEEE80211_GCMP_HDR_LEN; + key->conf.icv_len = IEEE80211_GCMP_MIC_LEN; + for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++) + for (j = 0; j < IEEE80211_GCMP_PN_LEN; j++) + key->u.gcmp.rx_pn[i][j] = + seq[IEEE80211_GCMP_PN_LEN - j - 1]; + /* Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.gcmp.tfm = ieee80211_aes_gcm_key_setup_encrypt(key_data, + key_len); + if (IS_ERR(key->u.gcmp.tfm)) { + err = PTR_ERR(key->u.gcmp.tfm); + kfree(key); + return ERR_PTR(err); + } + break; default: if (cs) { size_t len = (seq_len > MAX_PN_LEN) ? @@ -427,10 +502,24 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, static void ieee80211_key_free_common(struct ieee80211_key *key) { - if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + ieee80211_aes_gmac_key_free(key->u.aes_gmac.tfm); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + ieee80211_aes_gcm_key_free(key->u.gcmp.tfm); + break; + } kzfree(key); } @@ -737,6 +826,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, seq->tkip.iv16 = key->u.tkip.tx.iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn64 = atomic64_read(&key->u.ccmp.tx_pn); seq->ccmp.pn[5] = pn64; seq->ccmp.pn[4] = pn64 >> 8; @@ -746,6 +836,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, seq->ccmp.pn[0] = pn64 >> 40; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); seq->ccmp.pn[5] = pn64; seq->ccmp.pn[4] = pn64 >> 8; @@ -754,6 +845,26 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, seq->ccmp.pn[1] = pn64 >> 32; seq->ccmp.pn[0] = pn64 >> 40; break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + seq->ccmp.pn[5] = pn64; + seq->ccmp.pn[4] = pn64 >> 8; + seq->ccmp.pn[3] = pn64 >> 16; + seq->ccmp.pn[2] = pn64 >> 24; + seq->ccmp.pn[1] = pn64 >> 32; + seq->ccmp.pn[0] = pn64 >> 40; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn64 = atomic64_read(&key->u.gcmp.tx_pn); + seq->gcmp.pn[5] = pn64; + seq->gcmp.pn[4] = pn64 >> 8; + seq->gcmp.pn[3] = pn64 >> 16; + seq->gcmp.pn[2] = pn64 >> 24; + seq->gcmp.pn[1] = pn64 >> 32; + seq->gcmp.pn[0] = pn64 >> 40; + break; default: WARN_ON(1); } @@ -776,6 +887,7 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, seq->tkip.iv16 = key->u.tkip.rx[tid].iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) @@ -785,11 +897,29 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + if (WARN_ON(tid != 0)) + return; + pn = key->u.aes_gmac.rx_pn; + memcpy(seq->aes_gmac.pn, pn, IEEE80211_GMAC_PN_LEN); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) + return; + if (tid < 0) + pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; + else + pn = key->u.gcmp.rx_pn[tid]; + memcpy(seq->gcmp.pn, pn, IEEE80211_GCMP_PN_LEN); + break; } } EXPORT_SYMBOL(ieee80211_get_key_rx_seq); @@ -808,6 +938,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf, key->u.tkip.tx.iv16 = seq->tkip.iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: pn64 = (u64)seq->ccmp.pn[5] | ((u64)seq->ccmp.pn[4] << 8) | ((u64)seq->ccmp.pn[3] << 16) | @@ -817,6 +948,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf, atomic64_set(&key->u.ccmp.tx_pn, pn64); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: pn64 = (u64)seq->aes_cmac.pn[5] | ((u64)seq->aes_cmac.pn[4] << 8) | ((u64)seq->aes_cmac.pn[3] << 16) | @@ -825,6 +957,26 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf, ((u64)seq->aes_cmac.pn[0] << 40); atomic64_set(&key->u.aes_cmac.tx_pn, pn64); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + pn64 = (u64)seq->aes_gmac.pn[5] | + ((u64)seq->aes_gmac.pn[4] << 8) | + ((u64)seq->aes_gmac.pn[3] << 16) | + ((u64)seq->aes_gmac.pn[2] << 24) | + ((u64)seq->aes_gmac.pn[1] << 32) | + ((u64)seq->aes_gmac.pn[0] << 40); + atomic64_set(&key->u.aes_gmac.tx_pn, pn64); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn64 = (u64)seq->gcmp.pn[5] | + ((u64)seq->gcmp.pn[4] << 8) | + ((u64)seq->gcmp.pn[3] << 16) | + ((u64)seq->gcmp.pn[2] << 24) | + ((u64)seq->gcmp.pn[1] << 32) | + ((u64)seq->gcmp.pn[0] << 40); + atomic64_set(&key->u.gcmp.tx_pn, pn64); + break; default: WARN_ON(1); break; @@ -848,6 +1000,7 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, key->u.tkip.rx[tid].iv16 = seq->tkip.iv16; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) return; if (tid < 0) @@ -857,11 +1010,29 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + if (WARN_ON(tid != 0)) + return; + pn = key->u.aes_gmac.rx_pn; + memcpy(pn, seq->aes_gmac.pn, IEEE80211_GMAC_PN_LEN); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS)) + return; + if (tid < 0) + pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS]; + else + pn = key->u.gcmp.rx_pn[tid]; + memcpy(pn, seq->gcmp.pn, IEEE80211_GCMP_PN_LEN); + break; default: WARN_ON(1); break; @@ -884,7 +1055,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) increment_tailroom_need_count(key->sdata); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 19db68663d75..d57a9915494f 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -95,6 +95,24 @@ struct ieee80211_key { u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ } aes_cmac; struct { + atomic64_t tx_pn; + u8 rx_pn[IEEE80211_GMAC_PN_LEN]; + struct crypto_aead *tfm; + u32 replays; /* dot11RSNAStatsCMACReplays */ + u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ + } aes_gmac; + struct { + atomic64_t tx_pn; + /* Last received packet number. The first + * IEEE80211_NUM_TIDS counters are used with Data + * frames and the last counter is used with Robust + * Management frames. + */ + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_GCMP_PN_LEN]; + struct crypto_aead *tfm; + u32 replays; /* dot11RSNAStatsGCMPReplays */ + } gcmp; + struct { /* generic cipher scheme */ u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; } gen; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6ab99da38db9..5e09d354c5a5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -658,7 +658,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) bool have_wep = !(IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)); bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE; - const struct ieee80211_cipher_scheme *cs = local->hw.cipher_schemes; int n_suites = 0, r = 0, w = 0; u32 *suites; static const u32 cipher_suites[] = { @@ -667,79 +666,109 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) WLAN_CIPHER_SUITE_WEP104, WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, + WLAN_CIPHER_SUITE_CCMP_256, + WLAN_CIPHER_SUITE_GCMP, + WLAN_CIPHER_SUITE_GCMP_256, /* keep last -- depends on hw flags! */ - WLAN_CIPHER_SUITE_AES_CMAC + WLAN_CIPHER_SUITE_AES_CMAC, + WLAN_CIPHER_SUITE_BIP_CMAC_256, + WLAN_CIPHER_SUITE_BIP_GMAC_128, + WLAN_CIPHER_SUITE_BIP_GMAC_256, }; - /* Driver specifies the ciphers, we have nothing to do... */ - if (local->hw.wiphy->cipher_suites && have_wep) - return 0; + if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL || + local->hw.wiphy->cipher_suites) { + /* If the driver advertises, or doesn't support SW crypto, + * we only need to remove WEP if necessary. + */ + if (have_wep) + return 0; + + /* well if it has _no_ ciphers ... fine */ + if (!local->hw.wiphy->n_cipher_suites) + return 0; + + /* Driver provides cipher suites, but we need to exclude WEP */ + suites = kmemdup(local->hw.wiphy->cipher_suites, + sizeof(u32) * local->hw.wiphy->n_cipher_suites, + GFP_KERNEL); + if (!suites) + return -ENOMEM; + + for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { + u32 suite = local->hw.wiphy->cipher_suites[r]; - /* Set up cipher suites if driver relies on mac80211 cipher defs */ - if (!local->hw.wiphy->cipher_suites && !cs) { + if (suite == WLAN_CIPHER_SUITE_WEP40 || + suite == WLAN_CIPHER_SUITE_WEP104) + continue; + suites[w++] = suite; + } + } else if (!local->hw.cipher_schemes) { + /* If the driver doesn't have cipher schemes, there's nothing + * else to do other than assign the (software supported and + * perhaps offloaded) cipher suites. + */ local->hw.wiphy->cipher_suites = cipher_suites; local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); if (!have_mfp) - local->hw.wiphy->n_cipher_suites--; + local->hw.wiphy->n_cipher_suites -= 4; if (!have_wep) { local->hw.wiphy->cipher_suites += 2; local->hw.wiphy->n_cipher_suites -= 2; } + /* not dynamically allocated, so just return */ return 0; - } + } else { + const struct ieee80211_cipher_scheme *cs; - if (!local->hw.wiphy->cipher_suites) { - /* - * Driver specifies cipher schemes only - * We start counting ciphers defined by schemes, TKIP and CCMP + cs = local->hw.cipher_schemes; + + /* Driver specifies cipher schemes only (but not cipher suites + * including the schemes) + * + * We start counting ciphers defined by schemes, TKIP, CCMP, + * CCMP-256, GCMP, and GCMP-256 */ - n_suites = local->hw.n_cipher_schemes + 2; + n_suites = local->hw.n_cipher_schemes + 5; /* check if we have WEP40 and WEP104 */ if (have_wep) n_suites += 2; - /* check if we have AES_CMAC */ + /* check if we have AES_CMAC, BIP-CMAC-256, BIP-GMAC-128, + * BIP-GMAC-256 + */ if (have_mfp) - n_suites++; + n_suites += 4; suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL); if (!suites) return -ENOMEM; suites[w++] = WLAN_CIPHER_SUITE_CCMP; + suites[w++] = WLAN_CIPHER_SUITE_CCMP_256; suites[w++] = WLAN_CIPHER_SUITE_TKIP; + suites[w++] = WLAN_CIPHER_SUITE_GCMP; + suites[w++] = WLAN_CIPHER_SUITE_GCMP_256; if (have_wep) { suites[w++] = WLAN_CIPHER_SUITE_WEP40; suites[w++] = WLAN_CIPHER_SUITE_WEP104; } - if (have_mfp) + if (have_mfp) { suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC; + suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256; + suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_128; + suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256; + } for (r = 0; r < local->hw.n_cipher_schemes; r++) suites[w++] = cs[r].cipher; - } else { - /* Driver provides cipher suites, but we need to exclude WEP */ - suites = kmemdup(local->hw.wiphy->cipher_suites, - sizeof(u32) * local->hw.wiphy->n_cipher_suites, - GFP_KERNEL); - if (!suites) - return -ENOMEM; - - for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) { - u32 suite = local->hw.wiphy->cipher_suites[r]; - - if (suite == WLAN_CIPHER_SUITE_WEP40 || - suite == WLAN_CIPHER_SUITE_WEP104) - continue; - suites[w++] = suite; - } } local->hw.wiphy->cipher_suites = suites; @@ -916,10 +945,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } - WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) - && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), - "U-APSD not supported with HW_PS_NULLFUNC_STACK\n"); - /* * Calculate scan IE length -- we need this to alloc * memory and to subtract from the driver limit. It @@ -1045,10 +1070,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); - if (result) { - rtnl_lock(); + if (result) goto fail_pm_qos; - } #ifdef CONFIG_INET local->ifa_notifier.notifier_call = ieee80211_ifa_changed; @@ -1076,15 +1099,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) fail_ifa: pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); - rtnl_lock(); #endif fail_pm_qos: - ieee80211_led_exit(local); + rtnl_lock(); + rate_control_deinitialize(local); ieee80211_remove_interfaces(local); fail_rate: rtnl_unlock(); + ieee80211_led_exit(local); ieee80211_wep_free(local); - sta_info_stop(local); destroy_workqueue(local->workqueue); fail_workqueue: wiphy_unregister(local->hw.wiphy); @@ -1180,6 +1203,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) kfree(rcu_access_pointer(local->tx_latency)); + sta_info_stop(local); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2c36c4765f47..10ac6324c1d0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -157,14 +157,18 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_chan_def vht_chandef; + struct ieee80211_sta_ht_cap sta_ht_cap; u32 ht_cfreq, ret; + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + chandef->chan = channel; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = channel->center_freq; chandef->center_freq2 = 0; - if (!ht_cap || !ht_oper || !sband->ht_cap.ht_supported) { + if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } @@ -198,7 +202,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } /* check 40 MHz support, if we have it */ - if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { + if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: chandef->width = NL80211_CHAN_WIDTH_40; @@ -1054,8 +1058,6 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) sdata->csa_block_tx = false; } - cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef); - sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; @@ -1067,6 +1069,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) &ifmgd->csa_connection_drop_work); return; } + + cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) @@ -1284,8 +1288,11 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, country_ie_len -= 3; } - if (have_chan_pwr) + if (have_chan_pwr && pwr_constr_elem) *pwr_reduction = *pwr_constr_elem; + else + *pwr_reduction = 0; + return have_chan_pwr; } @@ -1314,10 +1321,11 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, int chan_pwr = 0, pwr_reduction_80211h = 0; int pwr_level_cisco, pwr_level_80211h; int new_ap_level; + __le16 capab = mgmt->u.probe_resp.capab_info; - if (country_ie && pwr_constr_ie && - mgmt->u.probe_resp.capab_info & - cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT)) { + if (country_ie && + (capab & cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT) || + capab & cpu_to_le16(WLAN_CAPABILITY_RADIO_MEASURE))) { has_80211h_pwr = ieee80211_find_80211h_pwr_constr( sdata, channel, country_ie, country_ie_len, pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h); @@ -1596,7 +1604,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) } else { ieee80211_send_nullfunc(local, sdata, 1); /* Flush to get the tx status of nullfunc frame */ - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); } } @@ -1643,7 +1651,7 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - bool ret; + bool ret = false; int ac; if (local->hw.queues < IEEE80211_NUM_ACS) @@ -2003,18 +2011,26 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* disable per-vif ps */ ieee80211_recalc_ps_vif(sdata); - /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ + /* make sure ongoing transmission finishes */ + synchronize_net(); + + /* + * drop any frame before deauth/disassoc, this can be data or + * management frame. Since we are disconnecting, we should not + * insist sending these frames which can take time and delay + * the disconnection and possible the roaming. + */ if (tx) - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, true); /* deauthenticate/disassociate now */ if (tx || frame_buf) ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype, reason, tx, frame_buf); - /* flush out frame */ + /* flush out frame - make sure the deauth was actually sent */ if (tx) - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); /* clear bssid only after building the needed mgmt frames */ memset(ifmgd->bssid, 0, ETH_ALEN); @@ -2440,6 +2456,12 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, sdata_assert_lock(sdata); if (!assoc) { + /* + * we are not authenticated yet, the only timer that could be + * running is the timeout for the authentication response which + * which is not relevant anymore. + */ + del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, auth_data->bss->bssid); memset(sdata->u.mgd.bssid, 0, ETH_ALEN); @@ -2747,6 +2769,12 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, sdata_assert_lock(sdata); if (!assoc) { + /* + * we are not associated yet, the only timer that could be + * running is the timeout for the association response which + * which is not relevant anymore. + */ + del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, assoc_data->bss->bssid); memset(sdata->u.mgd.bssid, 0, ETH_ALEN); @@ -4197,9 +4225,13 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; - bool have_sta = false; + struct ieee80211_supported_band *sband; + struct ieee80211_sta_ht_cap sta_ht_cap; + bool have_sta = false, is_override = false; int err; + sband = local->hw.wiphy->bands[cbss->channel->band]; + if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) return -EINVAL; @@ -4214,25 +4246,32 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (!new_sta) return -ENOMEM; } + + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + + is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) != + (sband->ht_cap.cap & + IEEE80211_HT_CAP_SUP_WIDTH_20_40); + + if (new_sta || is_override) { + err = ieee80211_prep_channel(sdata, cbss); + if (err) { + if (new_sta) + sta_info_free(local, new_sta); + return -EINVAL; + } + } + if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_supported_band *sband; const struct cfg80211_bss_ies *ies; - int shift; + int shift = ieee80211_vif_get_shift(&sdata->vif); u32 rate_flags; - sband = local->hw.wiphy->bands[cbss->channel->band]; - - err = ieee80211_prep_channel(sdata, cbss); - if (err) { - sta_info_free(local, new_sta); - return -EINVAL; - } - shift = ieee80211_vif_get_shift(&sdata->vif); - rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (WARN_ON(!chanctx_conf)) { @@ -4668,8 +4707,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; rcu_read_unlock(); + if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && + (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK), + "U-APSD not supported with HW_PS_NULLFUNC_STACK\n")) + sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD; + if (bss->wmm_used && bss->uapsd_supported && - (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { + (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD)) { assoc_data->uapsd = true; ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; } else { diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index ff20b2ebdb30..683f0e3cb124 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -121,7 +121,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, false); - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { @@ -398,7 +398,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_roc_notify_destroy(roc, !roc->abort); if (started && !on_channel) { - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); local->tmp_channel = NULL; ieee80211_hw_config(local, 0); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 4c5192e0d66c..ca405b6b686d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -41,7 +41,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* flush out all packets */ synchronize_net(); - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, true); local->quiescing = true; /* make quiescing visible to timers everywhere */ @@ -86,20 +86,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } } - /* tear down aggregation sessions and remove STAs */ - mutex_lock(&local->sta_mtx); - list_for_each_entry(sta, &local->sta_list, list) { - if (sta->uploaded) { - enum ieee80211_sta_state state; - - state = sta->sta_state; - for (; state > IEEE80211_STA_NOTEXIST; state--) - WARN_ON(drv_sta_state(local, sta->sdata, sta, - state, state - 1)); - } - } - mutex_unlock(&local->sta_mtx); - /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) @@ -111,6 +97,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) case NL80211_IFTYPE_STATION: ieee80211_mgd_quiesce(sdata); break; + case NL80211_IFTYPE_WDS: + /* tear down aggregation sessions and remove STAs */ + mutex_lock(&local->sta_mtx); + sta = sdata->u.wds.sta; + if (sta && sta->uploaded) { + enum ieee80211_sta_state state; + + state = sta->sta_state; + for (; state > IEEE80211_STA_NOTEXIST; state--) + WARN_ON(drv_sta_state(local, sta->sdata, + sta, state, + state - 1)); + } + mutex_unlock(&local->sta_mtx); + break; default: break; } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index d51f6b1c549b..7c86a002df95 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -263,12 +263,12 @@ static inline unsigned int minstrel_get_retry_count(struct minstrel_rate *mr, struct ieee80211_tx_info *info) { - unsigned int retry = mr->adjusted_retry_count; + u8 retry = mr->adjusted_retry_count; if (info->control.use_rts) - retry = max(2U, min(mr->stats.retry_count_rtscts, retry)); + retry = max_t(u8, 2, min(mr->stats.retry_count_rtscts, retry)); else if (info->control.use_cts_prot) - retry = max(2U, min(mr->retry_count_cts, retry)); + retry = max_t(u8, 2, min(mr->retry_count_cts, retry)); return retry; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 97eca86a4af0..410efe620c57 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -33,8 +33,8 @@ minstrel_ewma(int old, int new, int weight) struct minstrel_rate_stats { /* current / last sampling period attempts/success counters */ - unsigned int attempts, last_attempts; - unsigned int success, last_success; + u16 attempts, last_attempts; + u16 success, last_success; /* total attempts/success counters */ u64 att_hist, succ_hist; @@ -46,8 +46,8 @@ struct minstrel_rate_stats { unsigned int cur_prob, probability; /* maximum retry counts */ - unsigned int retry_count; - unsigned int retry_count_rtscts; + u8 retry_count; + u8 retry_count_rtscts; u8 sample_skipped; bool retry_updated; @@ -55,14 +55,15 @@ struct minstrel_rate_stats { struct minstrel_rate { int bitrate; - int rix; + + s8 rix; + u8 retry_count_cts; + u8 adjusted_retry_count; unsigned int perfect_tx_time; unsigned int ack_time; int sample_limit; - unsigned int retry_count_cts; - unsigned int adjusted_retry_count; struct minstrel_rate_stats stats; }; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 683b10f46505..1101563357ea 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -272,7 +272,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (rate && rate->flags & IEEE80211_RATE_ERP_G) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; else if (rate) - channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; + channel_flags |= IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ; else channel_flags |= IEEE80211_CHAN_2GHZ; put_unaligned_le16(channel_flags, pos); @@ -361,9 +361,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, u16 known = local->hw.radiotap_vht_details; rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); - /* known field - how to handle 80+80? */ - if (status->vht_flag & RX_VHT_FLAG_80P80MHZ) - known &= ~IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; put_unaligned_le16(known, pos); pos += 2; /* flags */ @@ -378,8 +375,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* bandwidth */ if (status->vht_flag & RX_VHT_FLAG_80MHZ) *pos++ = 4; - else if (status->vht_flag & RX_VHT_FLAG_80P80MHZ) - *pos++ = 0; /* marked not known above */ else if (status->vht_flag & RX_VHT_FLAG_160MHZ) *pos++ = 11; else if (status->flag & RX_FLAG_40MHZ) @@ -652,6 +647,7 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) { struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data; struct ieee80211_mmie *mmie; + struct ieee80211_mmie_16 *mmie16; if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da)) return -1; @@ -661,11 +657,18 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) mmie = (struct ieee80211_mmie *) (skb->data + skb->len - sizeof(*mmie)); - if (mmie->element_id != WLAN_EID_MMIE || - mmie->length != sizeof(*mmie) - 2) - return -1; - - return le16_to_cpu(mmie->key_id); + if (mmie->element_id == WLAN_EID_MMIE && + mmie->length == sizeof(*mmie) - 2) + return le16_to_cpu(mmie->key_id); + + mmie16 = (struct ieee80211_mmie_16 *) + (skb->data + skb->len - sizeof(*mmie16)); + if (skb->len >= 24 + sizeof(*mmie16) && + mmie16->element_id == WLAN_EID_MMIE && + mmie16->length == sizeof(*mmie16) - 2) + return le16_to_cpu(mmie16->key_id); + + return -1; } static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs, @@ -1655,11 +1658,27 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_tkip_decrypt(rx); break; case WLAN_CIPHER_SUITE_CCMP: - result = ieee80211_crypto_ccmp_decrypt(rx); + result = ieee80211_crypto_ccmp_decrypt( + rx, IEEE80211_CCMP_MIC_LEN); + break; + case WLAN_CIPHER_SUITE_CCMP_256: + result = ieee80211_crypto_ccmp_decrypt( + rx, IEEE80211_CCMP_256_MIC_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: result = ieee80211_crypto_aes_cmac_decrypt(rx); break; + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + result = ieee80211_crypto_aes_cmac_256_decrypt(rx); + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + result = ieee80211_crypto_aes_gmac_decrypt(rx); + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + result = ieee80211_crypto_gcmp_decrypt(rx); + break; default: result = ieee80211_crypto_hw_decrypt(rx); } @@ -1786,7 +1805,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->seqno_idx, &(rx->skb)); - if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP && + if (rx->key && + (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) && ieee80211_has_protected(fc)) { int queue = rx->security_idx; /* Store CCMP PN so that we can verify that the next @@ -1815,7 +1836,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) int i; u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; - if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) + if (!rx->key || + (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256)) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { @@ -2314,6 +2337,15 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; + if (rx->sta) { + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + rx->sta->rx_msdu[rx->seqno_idx]++; + } + /* * Send unexpected-4addr-frame event to hostapd. For older versions, * also drop the frame to cooked monitor interfaces. @@ -2598,7 +2630,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { struct ieee80211_supported_band *sband; u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; - enum ieee80211_sta_rx_bandwidth new_bw; + enum ieee80211_sta_rx_bandwidth max_bw, new_bw; /* If it doesn't support 40 MHz it can't change ... */ if (!(rx->sta->sta.ht_cap.cap & @@ -2606,13 +2638,18 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ) - new_bw = IEEE80211_STA_RX_BW_20; + max_bw = IEEE80211_STA_RX_BW_20; else - new_bw = ieee80211_sta_cur_vht_bw(rx->sta); + max_bw = ieee80211_sta_cap_rx_bw(rx->sta); + + /* set cur_max_bandwidth and recalc sta bw */ + rx->sta->cur_max_bandwidth = max_bw; + new_bw = ieee80211_sta_cur_vht_bw(rx->sta); if (rx->sta->sta.bandwidth == new_bw) goto handled; + rx->sta->sta.bandwidth = new_bw; sband = rx->local->hw.wiphy->bands[status->band]; rate_control_rate_update(local, sband, rx->sta, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index ae842678b629..05f0d711b6d8 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -416,7 +416,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, ieee80211_offchannel_stop_vifs(local); /* ensure nullfunc is transmitted before leaving operating channel */ - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); ieee80211_configure_filter(local); @@ -432,7 +432,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - if (local->radar_detect_enabled) + if (ieee80211_is_radar_required(local)) return false; if (!list_empty(&local->roc_list)) @@ -505,7 +505,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&local->mtx); - if (local->scan_req) + if (local->scan_req || ieee80211_is_radar_required(local)) return -EBUSY; if (!ieee80211_can_scan(local, sdata)) { @@ -805,7 +805,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local, ieee80211_offchannel_stop_vifs(local); if (local->ops->flush) { - ieee80211_flush_queues(local, NULL); + ieee80211_flush_queues(local, NULL, false); *next_delay = 0; } else *next_delay = HZ / 10; @@ -828,6 +828,11 @@ void ieee80211_scan_work(struct work_struct *work) mutex_lock(&local->mtx); + if (!ieee80211_can_run_worker(local)) { + aborted = true; + goto out_complete; + } + sdata = rcu_dereference_protected(local->scan_sdata, lockdep_is_held(&local->mtx)); scan_req = rcu_dereference_protected(local->scan_req, diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index efeba56c913b..06e6ac8cc693 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -34,19 +34,15 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; - const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; - ht_oper = elems->ht_operation; if (sta_flags & (IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_40MHZ)) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; - /* only used for bandwidth here */ - ht_oper = NULL; } if (sta_flags & IEEE80211_STA_DISABLE_VHT) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a42f5b2b024d..00ca8dcc2bcf 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -116,7 +116,6 @@ static void __cleanup_single_sta(struct sta_info *sta) clear_sta_flag(sta, WLAN_STA_PS_DELIVER); atomic_dec(&ps->num_sta_ps); - sta_info_recalc_tim(sta); } for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { @@ -625,7 +624,7 @@ static unsigned long ieee80211_tids_for_ac(int ac) } } -void sta_info_recalc_tim(struct sta_info *sta) +static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) { struct ieee80211_local *local = sta->local; struct ps_data *ps; @@ -667,6 +666,9 @@ void sta_info_recalc_tim(struct sta_info *sta) if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1) ignore_for_tim = 0; + if (ignore_pending) + ignore_for_tim = BIT(IEEE80211_NUM_ACS) - 1; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; @@ -695,7 +697,7 @@ void sta_info_recalc_tim(struct sta_info *sta) else __bss_tim_clear(ps->tim, id); - if (local->ops->set_tim) { + if (local->ops->set_tim && !WARN_ON(sta->dead)) { local->tim_in_locked_section = true; drv_set_tim(local, &sta->sta, indicate_tim); local->tim_in_locked_section = false; @@ -705,6 +707,11 @@ out_unlock: spin_unlock_bh(&local->tim_lock); } +void sta_info_recalc_tim(struct sta_info *sta) +{ + __sta_info_recalc_tim(sta, false); +} + static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_tx_info *info; @@ -874,6 +881,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + struct station_info sinfo = {}; int ret; /* @@ -887,6 +895,9 @@ static void __sta_info_destroy_part2(struct sta_info *sta) /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); + /* disable TIM bit - last chance to tell driver */ + __sta_info_recalc_tim(sta, true); + sta->dead = true; local->num_sta--; @@ -908,7 +919,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL); + sta_set_sinfo(sta, &sinfo); + cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); @@ -1243,10 +1255,11 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, * ends the poll/service period. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_CTL_PS_RESPONSE | IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; + info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; + if (call_driver) drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); @@ -1395,8 +1408,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * STA may still remain is PS mode after this frame * exchange. */ - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_CTL_PS_RESPONSE; + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; /* * Use MoreData flag to indicate whether there are @@ -1743,7 +1756,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct rate_control_ref *ref = NULL; struct timespec uptime; - u64 packets = 0; u32 thr = 0; int i, ac; @@ -1752,49 +1764,90 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->generation = sdata->local->sta_generation; - sinfo->filled = STATION_INFO_INACTIVE_TIME | - STATION_INFO_RX_BYTES64 | - STATION_INFO_TX_BYTES64 | - STATION_INFO_RX_PACKETS | - STATION_INFO_TX_PACKETS | - STATION_INFO_TX_RETRIES | - STATION_INFO_TX_FAILED | - STATION_INFO_TX_BITRATE | - STATION_INFO_RX_BITRATE | - STATION_INFO_RX_DROP_MISC | - STATION_INFO_BSS_PARAM | - STATION_INFO_CONNECTED_TIME | - STATION_INFO_STA_FLAGS | - STATION_INFO_BEACON_LOSS_COUNT; + /* do before driver, so beacon filtering drivers have a + * chance to e.g. just add the number of filtered beacons + * (or just modify the value entirely, of course) + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) + sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal; + + drv_sta_statistics(local, sdata, &sta->sta, sinfo); + + sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) | + BIT(NL80211_STA_INFO_STA_FLAGS) | + BIT(NL80211_STA_INFO_BSS_PARAM) | + BIT(NL80211_STA_INFO_CONNECTED_TIME) | + BIT(NL80211_STA_INFO_RX_DROP_MISC) | + BIT(NL80211_STA_INFO_BEACON_LOSS); ktime_get_ts(&uptime); sinfo->connected_time = uptime.tv_sec - sta->last_connected; - sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); - sinfo->tx_bytes = 0; - for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - sinfo->tx_bytes += sta->tx_bytes[ac]; - packets += sta->tx_packets[ac]; + + if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) | + BIT(NL80211_STA_INFO_TX_BYTES)))) { + sinfo->tx_bytes = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + sinfo->tx_bytes += sta->tx_bytes[ac]; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) { + sinfo->tx_packets = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + sinfo->tx_packets += sta->tx_packets[ac]; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS); + } + + if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) | + BIT(NL80211_STA_INFO_RX_BYTES)))) { + sinfo->rx_bytes = sta->rx_bytes; + sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) { + sinfo->rx_packets = sta->rx_packets; + sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) { + sinfo->tx_retries = sta->tx_retry_count; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES); } - sinfo->tx_packets = packets; - sinfo->rx_bytes = sta->rx_bytes; - sinfo->rx_packets = sta->rx_packets; - sinfo->tx_retries = sta->tx_retry_count; - sinfo->tx_failed = sta->tx_retry_failed; + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) { + sinfo->tx_failed = sta->tx_retry_failed; + sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED); + } + sinfo->rx_dropped_misc = sta->rx_dropped; sinfo->beacon_loss_count = sta->beacon_loss_count; + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { + sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX) | + BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG); + sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); + } + if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; - if (!local->ops->get_rssi || - drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal)) + if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) { sinfo->signal = (s8)sta->last_signal; - sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); + sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) { + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); + sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG); + } } - if (sta->chains) { - sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | - STATION_INFO_CHAIN_SIGNAL_AVG; + + if (sta->chains && + !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | + BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { + sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) | + BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); sinfo->chains = sta->chains; for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { @@ -1804,23 +1857,61 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) } } - sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); - sta_set_rate_info_rx(sta, &sinfo->rxrate); + if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) { + sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); + sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE); + } + + if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) { + sta_set_rate_info_rx(sta, &sinfo->rxrate); + sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE); + } + + sinfo->filled |= BIT(NL80211_STA_INFO_TID_STATS); + for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) { + struct cfg80211_tid_stats *tidstats = &sinfo->pertid[i]; + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { + tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); + tidstats->rx_msdu = sta->rx_msdu[i]; + } + + if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { + tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); + tidstats->tx_msdu = sta->tx_msdu[i]; + } + + if (!(tidstats->filled & + BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && + local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + tidstats->filled |= + BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); + tidstats->tx_msdu_retries = sta->tx_msdu_retries[i]; + } + + if (!(tidstats->filled & + BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && + local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { + tidstats->filled |= + BIT(NL80211_TID_STATS_TX_MSDU_FAILED); + tidstats->tx_msdu_failed = sta->tx_msdu_failed[i]; + } + } if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH - sinfo->filled |= STATION_INFO_LLID | - STATION_INFO_PLID | - STATION_INFO_PLINK_STATE | - STATION_INFO_LOCAL_PM | - STATION_INFO_PEER_PM | - STATION_INFO_NONPEER_PM; + sinfo->filled |= BIT(NL80211_STA_INFO_LLID) | + BIT(NL80211_STA_INFO_PLID) | + BIT(NL80211_STA_INFO_PLINK_STATE) | + BIT(NL80211_STA_INFO_LOCAL_PM) | + BIT(NL80211_STA_INFO_PEER_PM) | + BIT(NL80211_STA_INFO_NONPEER_PM); sinfo->llid = sta->llid; sinfo->plid = sta->plid; sinfo->plink_state = sta->plink_state; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { - sinfo->filled |= STATION_INFO_T_OFFSET; + sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET); sinfo->t_offset = sta->t_offset; } sinfo->local_pm = sta->local_pm; @@ -1869,7 +1960,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) thr = drv_get_expected_throughput(local, &sta->sta); if (thr != 0) { - sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT; + sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); sinfo->expected_throughput = thr; } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 4f052bb2a5ad..925e68fe64c7 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -346,6 +346,14 @@ struct ieee80211_tx_latency_stat { * @cipher_scheme: optional cipher scheme for this station * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) + * @tx_msdu: MSDUs transmitted to this station, using IEEE80211_NUM_TID + * entry for non-QoS frames + * @tx_msdu_retries: MSDU retries for transmissions to to this station, + * using IEEE80211_NUM_TID entry for non-QoS frames + * @tx_msdu_failed: MSDU failures for transmissions to to this station, + * using IEEE80211_NUM_TID entry for non-QoS frames + * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID + * entry for non-QoS frames */ struct sta_info { /* General information, mostly static */ @@ -416,6 +424,10 @@ struct sta_info { u32 last_rx_rate_vht_flag; u8 last_rx_rate_vht_nss; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; + u64 tx_msdu[IEEE80211_NUM_TIDS + 1]; + u64 tx_msdu_retries[IEEE80211_NUM_TIDS + 1]; + u64 tx_msdu_failed[IEEE80211_NUM_TIDS + 1]; + u64 rx_msdu[IEEE80211_NUM_TIDS + 1]; /* * Aggregation information, locked with lock. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index bb146f377ee4..e679b7c9b160 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -664,13 +664,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, struct ieee80211_supported_band *sband; int retry_count; int rates_idx; - bool acked; + bool acked, noack_success; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); sband = hw->wiphy->bands[info->band]; acked = !!(info->flags & IEEE80211_TX_STAT_ACK); + noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED); + if (pubsta) { struct sta_info *sta; @@ -696,7 +698,7 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, rate_control_tx_status_noskb(local, sband, sta, info); } - if (acked) { + if (acked || noack_success) { local->dot11TransmittedFrameCount++; if (!pubsta) local->dot11MulticastTransmittedFrameCount++; @@ -728,6 +730,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_bar *bar; int rtap_len; int shift = 0; + int tid = IEEE80211_NUM_TIDS; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -771,7 +774,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && (ieee80211_is_data_qos(fc))) { - u16 tid, ssn; + u16 ssn; u8 *qc; qc = ieee80211_get_qos_ctl(hdr); @@ -780,10 +783,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) & IEEE80211_SCTL_SEQ); ieee80211_send_bar(&sta->sdata->vif, hdr->addr1, tid, ssn); + } else if (ieee80211_is_data_qos(fc)) { + u8 *qc = ieee80211_get_qos_ctl(hdr); + + tid = qc[0] & 0xf; } if (!acked && ieee80211_is_back_req(fc)) { - u16 tid, control; + u16 control; /* * BAR failed, store the last SSN and retry sending @@ -811,6 +818,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (!acked) sta->tx_retry_failed++; sta->tx_retry_count += retry_count; + + if (ieee80211_is_data_present(fc)) { + if (!acked) + sta->tx_msdu_failed[tid]++; + sta->tx_msdu_retries[tid] += retry_count; + } } rate_control_tx_status(local, sband, sta, skb); @@ -856,10 +869,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) * Fragments are passed to low-level drivers as separate skbs, so these * are actually fragments, not frames. Update frame counters only for * the first fragment of the frame. */ - if (info->flags & IEEE80211_TX_STAT_ACK) { + if ((info->flags & IEEE80211_TX_STAT_ACK) || + (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) { if (ieee80211_is_first_frag(hdr->seq_ctrl)) { local->dot11TransmittedFrameCount++; - if (is_multicast_ether_addr(hdr->addr1)) + if (is_multicast_ether_addr(ieee80211_get_DA(hdr))) local->dot11MulticastTransmittedFrameCount++; if (retry_count > 0) local->dot11RetryCount++; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 55ddd77b865d..c9f9752217ac 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -68,17 +68,24 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, ch = ieee80211_get_channel(sdata->local->hw.wiphy, i); if (ch) { /* we will be active on the channel */ - u32 flags = IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_IR; cfg80211_chandef_create(&chandef, ch, - NL80211_CHAN_HT20); - if (cfg80211_chandef_usable(sdata->local->hw.wiphy, - &chandef, flags)) { + NL80211_CHAN_NO_HT); + if (cfg80211_reg_can_beacon(sdata->local->hw.wiphy, + &chandef, + sdata->wdev.iftype)) { ch_cnt++; + /* + * check if the next channel is also part of + * this allowed range + */ continue; } } + /* + * we've reached the end of a range, with allowed channels + * found + */ if (ch_cnt) { u8 *pos = skb_put(skb, 2); *pos++ = ieee80211_frequency_to_channel(subband_start); @@ -89,6 +96,15 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata, } } + /* all channels in the requested range are allowed - add them here */ + if (ch_cnt) { + u8 *pos = skb_put(skb, 2); + *pos++ = ieee80211_frequency_to_channel(subband_start); + *pos++ = ch_cnt; + + subband_cnt++; + } + return subband_cnt; } @@ -329,24 +345,24 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, */ sband = local->hw.wiphy->bands[band]; memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); - if ((action_code == WLAN_TDLS_SETUP_REQUEST || - action_code == WLAN_TDLS_SETUP_RESPONSE) && - ht_cap.ht_supported && (!sta || sta->sta.ht_cap.ht_supported)) { - if (action_code == WLAN_TDLS_SETUP_REQUEST) { - ieee80211_apply_htcap_overrides(sdata, &ht_cap); - - /* disable SMPS in TDLS initiator */ - ht_cap.cap |= (WLAN_HT_CAP_SM_PS_DISABLED - << IEEE80211_HT_CAP_SM_PS_SHIFT); - } else { - /* disable SMPS in TDLS responder */ - sta->sta.ht_cap.cap |= - (WLAN_HT_CAP_SM_PS_DISABLED - << IEEE80211_HT_CAP_SM_PS_SHIFT); - - /* the peer caps are already intersected with our own */ - memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap)); - } + + if (action_code == WLAN_TDLS_SETUP_REQUEST && ht_cap.ht_supported) { + ieee80211_apply_htcap_overrides(sdata, &ht_cap); + + /* disable SMPS in TDLS initiator */ + ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED + << IEEE80211_HT_CAP_SM_PS_SHIFT; + + pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); + ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); + } else if (action_code == WLAN_TDLS_SETUP_RESPONSE && + ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) { + /* disable SMPS in TDLS responder */ + sta->sta.ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED + << IEEE80211_HT_CAP_SM_PS_SHIFT; + + /* the peer caps are already intersected with our own */ + memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap)); pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap); @@ -836,7 +852,6 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, */ if ((action_code == WLAN_TDLS_TEARDOWN) && (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { - struct sta_info *sta = NULL; bool try_resend; /* Should we keep skb for possible resend */ /* If not sending directly to peer - no point in keeping skb */ @@ -912,7 +927,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, rcu_read_unlock(); } - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, dialog_token, status_code, @@ -952,7 +967,7 @@ ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev, */ ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN); - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code, dialog_token, status_code, @@ -1098,7 +1113,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, */ tasklet_kill(&local->tx_pending_tasklet); /* flush a potentially queued teardown packet */ - ieee80211_flush_queues(local, sdata); + ieee80211_flush_queues(local, sdata, false); ret = sta_info_destroy_addr(sdata, peer); break; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 8e461a02c6a8..263a9561eb26 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -825,6 +825,13 @@ DECLARE_EVENT_CLASS(sta_event, ) ); +DEFINE_EVENT(sta_event, drv_sta_statistics, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); + DEFINE_EVENT(sta_event, drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1329,32 +1336,6 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames, TP_ARGS(local, sta, tids, num_frames, reason, more_data) ); -TRACE_EVENT(drv_get_rssi, - TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, - s8 rssi, int ret), - - TP_ARGS(local, sta, rssi, ret), - - TP_STRUCT__entry( - LOCAL_ENTRY - STA_ENTRY - __field(s8, rssi) - __field(int, ret) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - STA_ASSIGN; - __entry->rssi = rssi; - __entry->ret = ret; - ), - - TP_printk( - LOCAL_PR_FMT STA_PR_FMT " rssi:%d ret:%d", - LOCAL_PR_ARG, STA_PR_ARG, __entry->rssi, __entry->ret - ) -); - DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 058686a721a1..88a18ffe2975 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -626,6 +626,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = NULL; break; case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: if (!ieee80211_is_data_present(hdr->frame_control) && !ieee80211_use_mfp(hdr->frame_control, tx->sta, tx->skb)) @@ -636,6 +639,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) ieee80211_is_mgmt(hdr->frame_control); break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (!ieee80211_is_mgmt(hdr->frame_control)) tx->key = NULL; break; @@ -815,6 +821,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) /* for pure STA mode without beacons, we can do it */ hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number); tx->sdata->sequence_number += 0x10; + if (tx->sta) + tx->sta->tx_msdu[IEEE80211_NUM_TIDS]++; return TX_CONTINUE; } @@ -831,6 +839,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; seq = &tx->sta->tid_seq[tid]; + tx->sta->tx_msdu[tid]++; hdr->seq_ctrl = cpu_to_le16(*seq); @@ -1008,9 +1017,21 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) case WLAN_CIPHER_SUITE_TKIP: return ieee80211_crypto_tkip_encrypt(tx); case WLAN_CIPHER_SUITE_CCMP: - return ieee80211_crypto_ccmp_encrypt(tx); + return ieee80211_crypto_ccmp_encrypt( + tx, IEEE80211_CCMP_MIC_LEN); + case WLAN_CIPHER_SUITE_CCMP_256: + return ieee80211_crypto_ccmp_encrypt( + tx, IEEE80211_CCMP_256_MIC_LEN); case WLAN_CIPHER_SUITE_AES_CMAC: return ieee80211_crypto_aes_cmac_encrypt(tx); + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + return ieee80211_crypto_aes_cmac_256_encrypt(tx); + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + return ieee80211_crypto_aes_gmac_encrypt(tx); + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + return ieee80211_crypto_gcmp_encrypt(tx); default: return ieee80211_crypto_hw_encrypt(tx); } @@ -3152,7 +3173,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid) } queues = BIT(sdata->vif.hw_queue[ieee802_1d_to_ac[tid]]); - __ieee80211_flush_queues(local, sdata, queues); + __ieee80211_flush_queues(local, sdata, queues, false); sta->reserved_tid = tid; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 974ebe70f5b0..8428f4a95479 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -578,7 +578,7 @@ ieee80211_get_vif_queues(struct ieee80211_local *local, void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - unsigned int queues) + unsigned int queues, bool drop) { if (!local->ops->flush) return; @@ -594,7 +594,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, IEEE80211_QUEUE_STOP_REASON_FLUSH, false); - drv_flush(local, sdata, queues, false); + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, IEEE80211_QUEUE_STOP_REASON_FLUSH, @@ -602,9 +602,9 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, } void ieee80211_flush_queues(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + struct ieee80211_sub_if_data *sdata, bool drop) { - __ieee80211_flush_queues(local, sdata, 0); + __ieee80211_flush_queues(local, sdata, 0, drop); } void ieee80211_stop_vif_queues(struct ieee80211_local *local, @@ -744,16 +744,19 @@ EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif); /* * Nothing should have been stuffed into the workqueue during - * the suspend->resume cycle. If this WARN is seen then there - * is a bug with either the driver suspend or something in - * mac80211 stuffing into the workqueue which we haven't yet - * cleared during mac80211's suspend cycle. + * the suspend->resume cycle. Since we can't check each caller + * of this function if we are already quiescing / suspended, + * check here and don't WARN since this can actually happen when + * the rx path (for example) is racing against __ieee80211_suspend + * and suspending / quiescing was set after the rx path checked + * them. */ static bool ieee80211_can_queue_work(struct ieee80211_local *local) { - if (WARN(local->suspended && !local->resuming, - "queueing ieee80211 work while going to suspend\n")) + if (local->quiescing || (local->suspended && !local->resuming)) { + pr_warn("queueing ieee80211 work while going to suspend\n"); return false; + } return true; } @@ -1470,10 +1473,12 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* Check if any channel in this sband supports at least 80 MHz */ for (i = 0; i < sband->n_channels; i++) { - if (!(sband->channels[i].flags & IEEE80211_CHAN_NO_80MHZ)) { - have_80mhz = true; - break; - } + if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_NO_80MHZ)) + continue; + + have_80mhz = true; + break; } if (sband->vht_cap.vht_supported && have_80mhz) { @@ -1735,6 +1740,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct cfg80211_sched_scan_request *sched_scan_req; bool sched_scan_stopped = false; + /* nothing to do if HW shouldn't run */ + if (!local->open_count) + goto wake_up; + #ifdef CONFIG_PM if (local->suspended) local->resuming = true; @@ -1756,9 +1765,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) reconfig_due_to_wowlan = true; } #endif - /* everything else happens only if HW was up & running */ - if (!local->open_count) - goto wake_up; /* * Upon resume hardware can sometimes be goofy due to @@ -2042,7 +2048,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) * If this is for hw restart things are still running. * We may want to change that later, however. */ - if (!local->suspended || reconfig_due_to_wowlan) + if (local->open_count && (!local->suspended || reconfig_due_to_wowlan)) drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); if (!local->suspended) @@ -2054,7 +2060,19 @@ int ieee80211_reconfig(struct ieee80211_local *local) mb(); local->resuming = false; - if (!reconfig_due_to_wowlan) + /* It's possible that we don't handle the scan completion in + * time during suspend, so if it's still marked as completed + * here, queue the work and flush it to clean things up. + * Instead of calling the worker function directly here, we + * really queue it to avoid potential races with other flows + * scheduling the same work. + */ + if (test_bit(SCAN_COMPLETED, &local->scanning)) { + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); + flush_delayed_work(&local->scan_work); + } + + if (local->open_count && !reconfig_due_to_wowlan) drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_SUSPEND); list_for_each_entry(sdata, &local->interfaces, list) { @@ -2538,7 +2556,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, ri.mcs = status->rate_idx; ri.flags |= RATE_INFO_FLAGS_MCS; if (status->flag & RX_FLAG_40MHZ) - ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; + ri.bw = RATE_INFO_BW_40; + else + ri.bw = RATE_INFO_BW_20; if (status->flag & RX_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; } else if (status->flag & RX_FLAG_VHT) { @@ -2546,13 +2566,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, ri.mcs = status->rate_idx; ri.nss = status->vht_nss; if (status->flag & RX_FLAG_40MHZ) - ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; - if (status->vht_flag & RX_VHT_FLAG_80MHZ) - ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH; - if (status->vht_flag & RX_VHT_FLAG_80P80MHZ) - ri.flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH; - if (status->vht_flag & RX_VHT_FLAG_160MHZ) - ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH; + ri.bw = RATE_INFO_BW_40; + else if (status->vht_flag & RX_VHT_FLAG_80MHZ) + ri.bw = RATE_INFO_BW_80; + else if (status->vht_flag & RX_VHT_FLAG_160MHZ) + ri.bw = RATE_INFO_BW_160; + else + ri.bw = RATE_INFO_BW_20; if (status->flag & RX_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; } else { @@ -2560,10 +2580,15 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, int shift = 0; int bitrate; - if (status->flag & RX_FLAG_10MHZ) + if (status->flag & RX_FLAG_10MHZ) { shift = 1; - if (status->flag & RX_FLAG_5MHZ) + ri.bw = RATE_INFO_BW_10; + } else if (status->flag & RX_FLAG_5MHZ) { shift = 2; + ri.bw = RATE_INFO_BW_5; + } else { + ri.bw = RATE_INFO_BW_20; + } sband = local->hw.wiphy->bands[status->band]; bitrate = sband->bitrates[status->rate_idx].bitrate; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index bc9e8fc48785..85f9596da07b 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -269,51 +269,54 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); } -enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) { - struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 cap = sta->sta.vht_cap.cap; - enum ieee80211_sta_rx_bandwidth bw; + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + u32 cap_width; - if (!sta->sta.vht_cap.vht_supported) { - bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? - IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; - goto check_max; - } + if (!vht_cap->vht_supported) + return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_STA_RX_BW_40 : + IEEE80211_STA_RX_BW_20; - switch (sdata->vif.bss_conf.chandef.width) { - default: - WARN_ON_ONCE(1); - /* fall through */ + cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + + if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ || + cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) + return IEEE80211_STA_RX_BW_160; + + return IEEE80211_STA_RX_BW_80; +} + +static enum ieee80211_sta_rx_bandwidth +ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) +{ + switch (width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: - bw = IEEE80211_STA_RX_BW_20; - break; + return IEEE80211_STA_RX_BW_20; case NL80211_CHAN_WIDTH_40: - bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? - IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; - break; + return IEEE80211_STA_RX_BW_40; + case NL80211_CHAN_WIDTH_80: + return IEEE80211_STA_RX_BW_80; case NL80211_CHAN_WIDTH_160: - if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) { - bw = IEEE80211_STA_RX_BW_160; - break; - } - /* fall through */ case NL80211_CHAN_WIDTH_80P80: - if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) { - bw = IEEE80211_STA_RX_BW_160; - break; - } - /* fall through */ - case NL80211_CHAN_WIDTH_80: - bw = IEEE80211_STA_RX_BW_80; + return IEEE80211_STA_RX_BW_160; + default: + WARN_ON_ONCE(1); + return IEEE80211_STA_RX_BW_20; } +} + +enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + enum ieee80211_sta_rx_bandwidth bw; + + bw = ieee80211_chan_width_to_rx_bw(sdata->vif.bss_conf.chandef.width); + bw = min(bw, ieee80211_sta_cap_rx_bw(sta)); + bw = min(bw, sta->cur_max_bandwidth); - check_max: - if (bw > sta->cur_max_bandwidth) - bw = sta->cur_max_bandwidth; return bw; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 12398fde02e8..75de6fac40d1 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -22,6 +22,8 @@ #include "tkip.h" #include "aes_ccm.h" #include "aes_cmac.h" +#include "aes_gmac.h" +#include "aes_gcm.h" #include "wpa.h" ieee80211_tx_result @@ -393,7 +395,8 @@ static inline void ccmp_hdr2pn(u8 *pn, u8 *hdr) } -static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) +static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, + unsigned int mic_len) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; @@ -424,7 +427,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = IEEE80211_CCMP_MIC_LEN; + tail = mic_len; if (WARN_ON(skb_tailroom(skb) < tail || skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN)) @@ -459,21 +462,22 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad); ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, - skb_put(skb, IEEE80211_CCMP_MIC_LEN)); + skb_put(skb, mic_len), mic_len); return 0; } ieee80211_tx_result -ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) +ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx, + unsigned int mic_len) { struct sk_buff *skb; ieee80211_tx_set_protected(tx); skb_queue_walk(&tx->skbs, skb) { - if (ccmp_encrypt_skb(tx, skb) < 0) + if (ccmp_encrypt_skb(tx, skb, mic_len) < 0) return TX_DROP; } @@ -482,7 +486,8 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) ieee80211_rx_result -ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) +ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, + unsigned int mic_len) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; @@ -499,8 +504,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(skb)) return RX_CONTINUE; - data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - - IEEE80211_CCMP_MIC_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; @@ -531,14 +535,14 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) key->u.ccmp.tfm, b_0, aad, skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, data_len, - skb->data + skb->len - IEEE80211_CCMP_MIC_LEN)) + skb->data + skb->len - mic_len, mic_len)) return RX_DROP_UNUSABLE; } memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); /* Remove CCMP header and MIC */ - if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - mic_len)) return RX_DROP_UNUSABLE; memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_CCMP_HDR_LEN); @@ -546,6 +550,229 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad) +{ + __le16 mask_fc; + u8 qos_tid; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + memcpy(j_0, hdr->addr2, ETH_ALEN); + memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN); + j_0[13] = 0; + j_0[14] = 0; + j_0[AES_BLOCK_SIZE - 1] = 0x01; + + /* AAD (extra authenticate-only data) / masked 802.11 header + * FC | A1 | A2 | A3 | SC | [A4] | [QC] + */ + put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]); + /* Mask FC: zero subtype b4 b5 b6 (if not mgmt) + * Retry, PwrMgt, MoreData; set Protected + */ + mask_fc = hdr->frame_control; + mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY | + IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); + if (!ieee80211_is_mgmt(hdr->frame_control)) + mask_fc &= ~cpu_to_le16(0x0070); + mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + + put_unaligned(mask_fc, (__le16 *)&aad[2]); + memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN); + + /* Mask Seq#, leave Frag# */ + aad[22] = *((u8 *)&hdr->seq_ctrl) & 0x0f; + aad[23] = 0; + + if (ieee80211_is_data_qos(hdr->frame_control)) + qos_tid = *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_TID_MASK; + else + qos_tid = 0; + + if (ieee80211_has_a4(hdr->frame_control)) { + memcpy(&aad[24], hdr->addr4, ETH_ALEN); + aad[30] = qos_tid; + aad[31] = 0; + } else { + memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN); + aad[24] = qos_tid; + } +} + +static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id) +{ + hdr[0] = pn[5]; + hdr[1] = pn[4]; + hdr[2] = 0; + hdr[3] = 0x20 | (key_id << 6); + hdr[4] = pn[3]; + hdr[5] = pn[2]; + hdr[6] = pn[1]; + hdr[7] = pn[0]; +} + +static inline void gcmp_hdr2pn(u8 *pn, const u8 *hdr) +{ + pn[0] = hdr[7]; + pn[1] = hdr[6]; + pn[2] = hdr[5]; + pn[3] = hdr[4]; + pn[4] = hdr[1]; + pn[5] = hdr[0]; +} + +static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_key *key = tx->key; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int hdrlen, len, tail; + u8 *pos; + u8 pn[6]; + u64 pn64; + u8 aad[2 * AES_BLOCK_SIZE]; + u8 j_0[AES_BLOCK_SIZE]; + + if (info->control.hw_key && + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && + !((info->control.hw_key->flags & + IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) && + ieee80211_is_mgmt(hdr->frame_control))) { + /* hwaccel has no need for preallocated room for GCMP + * header or MIC fields + */ + return 0; + } + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + len = skb->len - hdrlen; + + if (info->control.hw_key) + tail = 0; + else + tail = IEEE80211_GCMP_MIC_LEN; + + if (WARN_ON(skb_tailroom(skb) < tail || + skb_headroom(skb) < IEEE80211_GCMP_HDR_LEN)) + return -1; + + pos = skb_push(skb, IEEE80211_GCMP_HDR_LEN); + memmove(pos, pos + IEEE80211_GCMP_HDR_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_GCMP_HDR_LEN); + + /* the HW only needs room for the IV, but not the actual IV */ + if (info->control.hw_key && + (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) + return 0; + + hdr = (struct ieee80211_hdr *)pos; + pos += hdrlen; + + pn64 = atomic64_inc_return(&key->u.gcmp.tx_pn); + + pn[5] = pn64; + pn[4] = pn64 >> 8; + pn[3] = pn64 >> 16; + pn[2] = pn64 >> 24; + pn[1] = pn64 >> 32; + pn[0] = pn64 >> 40; + + gcmp_pn2hdr(pos, pn, key->conf.keyidx); + + /* hwaccel - with software GCMP header */ + if (info->control.hw_key) + return 0; + + pos += IEEE80211_GCMP_HDR_LEN; + gcmp_special_blocks(skb, pn, j_0, aad); + ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, + skb_put(skb, IEEE80211_GCMP_MIC_LEN)); + + return 0; +} + +ieee80211_tx_result +ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb; + + ieee80211_tx_set_protected(tx); + + skb_queue_walk(&tx->skbs, skb) { + if (gcmp_encrypt_skb(tx, skb) < 0) + return TX_DROP; + } + + return TX_CONTINUE; +} + +ieee80211_rx_result +ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + int hdrlen; + struct ieee80211_key *key = rx->key; + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + u8 pn[IEEE80211_GCMP_PN_LEN]; + int data_len; + int queue; + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_robust_mgmt_frame(skb)) + return RX_CONTINUE; + + data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - + IEEE80211_GCMP_MIC_LEN; + if (!rx->sta || data_len < 0) + return RX_DROP_UNUSABLE; + + if (status->flag & RX_FLAG_DECRYPTED) { + if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN)) + return RX_DROP_UNUSABLE; + } else { + if (skb_linearize(rx->skb)) + return RX_DROP_UNUSABLE; + } + + gcmp_hdr2pn(pn, skb->data + hdrlen); + + queue = rx->security_idx; + + if (memcmp(pn, key->u.gcmp.rx_pn[queue], IEEE80211_GCMP_PN_LEN) <= 0) { + key->u.gcmp.replays++; + return RX_DROP_UNUSABLE; + } + + if (!(status->flag & RX_FLAG_DECRYPTED)) { + u8 aad[2 * AES_BLOCK_SIZE]; + u8 j_0[AES_BLOCK_SIZE]; + /* hardware didn't decrypt/verify MIC */ + gcmp_special_blocks(skb, pn, j_0, aad); + + if (ieee80211_aes_gcm_decrypt( + key->u.gcmp.tfm, j_0, aad, + skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN, + data_len, + skb->data + skb->len - IEEE80211_GCMP_MIC_LEN)) + return RX_DROP_UNUSABLE; + } + + memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); + + /* Remove GCMP header and MIC */ + if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN)) + return RX_DROP_UNUSABLE; + memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_GCMP_HDR_LEN); + + return RX_CONTINUE; +} + static ieee80211_tx_result ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx, struct sk_buff *skb) @@ -729,6 +956,48 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) return TX_CONTINUE; } +ieee80211_tx_result +ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb; + struct ieee80211_tx_info *info; + struct ieee80211_key *key = tx->key; + struct ieee80211_mmie_16 *mmie; + u8 aad[20]; + u64 pn64; + + if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) + return TX_DROP; + + skb = skb_peek(&tx->skbs); + + info = IEEE80211_SKB_CB(skb); + + if (info->control.hw_key) + return TX_CONTINUE; + + if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) + return TX_DROP; + + mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie)); + mmie->element_id = WLAN_EID_MMIE; + mmie->length = sizeof(*mmie) - 2; + mmie->key_id = cpu_to_le16(key->conf.keyidx); + + /* PN = PN + 1 */ + pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn); + + bip_ipn_set64(mmie->sequence_number, pn64); + + bip_aad(skb, aad); + + /* MIC = AES-256-CMAC(IGTK, AAD || Management Frame Body || MMIE, 128) + */ + ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad, + skb->data + 24, skb->len - 24, mmie->mic); + + return TX_CONTINUE; +} ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) @@ -780,6 +1049,160 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +ieee80211_rx_result +ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_key *key = rx->key; + struct ieee80211_mmie_16 *mmie; + u8 aad[20], mic[16], ipn[6]; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + if (!ieee80211_is_mgmt(hdr->frame_control)) + return RX_CONTINUE; + + /* management frames are already linear */ + + if (skb->len < 24 + sizeof(*mmie)) + return RX_DROP_UNUSABLE; + + mmie = (struct ieee80211_mmie_16 *) + (skb->data + skb->len - sizeof(*mmie)); + if (mmie->element_id != WLAN_EID_MMIE || + mmie->length != sizeof(*mmie) - 2) + return RX_DROP_UNUSABLE; /* Invalid MMIE */ + + bip_ipn_swap(ipn, mmie->sequence_number); + + if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) { + key->u.aes_cmac.replays++; + return RX_DROP_UNUSABLE; + } + + if (!(status->flag & RX_FLAG_DECRYPTED)) { + /* hardware didn't decrypt/verify MIC */ + bip_aad(skb, aad); + ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad, + skb->data + 24, skb->len - 24, mic); + if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + key->u.aes_cmac.icverrors++; + return RX_DROP_UNUSABLE; + } + } + + memcpy(key->u.aes_cmac.rx_pn, ipn, 6); + + /* Remove MMIE */ + skb_trim(skb, skb->len - sizeof(*mmie)); + + return RX_CONTINUE; +} + +ieee80211_tx_result +ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb; + struct ieee80211_tx_info *info; + struct ieee80211_key *key = tx->key; + struct ieee80211_mmie_16 *mmie; + struct ieee80211_hdr *hdr; + u8 aad[20]; + u64 pn64; + u8 nonce[12]; + + if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) + return TX_DROP; + + skb = skb_peek(&tx->skbs); + + info = IEEE80211_SKB_CB(skb); + + if (info->control.hw_key) + return TX_CONTINUE; + + if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) + return TX_DROP; + + mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie)); + mmie->element_id = WLAN_EID_MMIE; + mmie->length = sizeof(*mmie) - 2; + mmie->key_id = cpu_to_le16(key->conf.keyidx); + + /* PN = PN + 1 */ + pn64 = atomic64_inc_return(&key->u.aes_gmac.tx_pn); + + bip_ipn_set64(mmie->sequence_number, pn64); + + bip_aad(skb, aad); + + hdr = (struct ieee80211_hdr *)skb->data; + memcpy(nonce, hdr->addr2, ETH_ALEN); + bip_ipn_swap(nonce + ETH_ALEN, mmie->sequence_number); + + /* MIC = AES-GMAC(IGTK, AAD || Management Frame Body || MMIE, 128) */ + if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, + skb->data + 24, skb->len - 24, mmie->mic) < 0) + return TX_DROP; + + return TX_CONTINUE; +} + +ieee80211_rx_result +ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_key *key = rx->key; + struct ieee80211_mmie_16 *mmie; + u8 aad[20], mic[16], ipn[6], nonce[12]; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + + if (!ieee80211_is_mgmt(hdr->frame_control)) + return RX_CONTINUE; + + /* management frames are already linear */ + + if (skb->len < 24 + sizeof(*mmie)) + return RX_DROP_UNUSABLE; + + mmie = (struct ieee80211_mmie_16 *) + (skb->data + skb->len - sizeof(*mmie)); + if (mmie->element_id != WLAN_EID_MMIE || + mmie->length != sizeof(*mmie) - 2) + return RX_DROP_UNUSABLE; /* Invalid MMIE */ + + bip_ipn_swap(ipn, mmie->sequence_number); + + if (memcmp(ipn, key->u.aes_gmac.rx_pn, 6) <= 0) { + key->u.aes_gmac.replays++; + return RX_DROP_UNUSABLE; + } + + if (!(status->flag & RX_FLAG_DECRYPTED)) { + /* hardware didn't decrypt/verify MIC */ + bip_aad(skb, aad); + + memcpy(nonce, hdr->addr2, ETH_ALEN); + memcpy(nonce + ETH_ALEN, ipn, 6); + + if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, + skb->data + 24, skb->len - 24, + mic) < 0 || + memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + key->u.aes_gmac.icverrors++; + return RX_DROP_UNUSABLE; + } + } + + memcpy(key->u.aes_gmac.rx_pn, ipn, 6); + + /* Remove MMIE */ + skb_trim(skb, skb->len - sizeof(*mmie)); + + return RX_CONTINUE; +} + ieee80211_tx_result ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx) { diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index 62e5a12dfe0a..d98011ee8f55 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -24,17 +24,32 @@ ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx); ieee80211_tx_result -ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx); +ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx, + unsigned int mic_len); ieee80211_rx_result -ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx); +ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, + unsigned int mic_len); ieee80211_tx_result ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx); +ieee80211_tx_result +ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx); ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx); +ieee80211_rx_result +ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx); +ieee80211_tx_result +ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx); +ieee80211_rx_result +ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx); ieee80211_tx_result ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx); ieee80211_rx_result ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx); +ieee80211_tx_result +ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx); +ieee80211_rx_result +ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx); + #endif /* WPA_H */ diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index c035708ada16..5d9f68c75e5f 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -51,10 +51,7 @@ ieee802154_add_iface(struct wpan_phy *phy, const char *name, struct net_device *err; err = ieee802154_if_add(local, name, type, extended_addr); - if (IS_ERR(err)) - return PTR_ERR(err); - - return 0; + return PTR_ERR_OR_ZERO(err); } static int @@ -87,6 +84,26 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel) } static int +ieee802154_set_cca_mode(struct wpan_phy *wpan_phy, + const struct wpan_phy_cca *cca) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + int ret; + + ASSERT_RTNL(); + + /* check if phy support this setting */ + if (!(local->hw.flags & IEEE802154_HW_CCA_MODE)) + return -EOPNOTSUPP; + + ret = drv_set_cca_mode(local, cca); + if (!ret) + wpan_phy->cca = *cca; + + return ret; +} + +static int ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, __le16 pan_id) { @@ -201,6 +218,7 @@ const struct cfg802154_ops mac802154_config_ops = { .add_virtual_intf = ieee802154_add_iface, .del_virtual_intf = ieee802154_del_iface, .set_channel = ieee802154_set_channel, + .set_cca_mode = ieee802154_set_cca_mode, .set_pan_id = ieee802154_set_pan_id, .set_short_addr = ieee802154_set_short_addr, .set_backoff_exponent = ieee802154_set_backoff_exponent, diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index f21e864613d0..98180a9fff4a 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -70,7 +70,8 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm) return local->ops->set_txpower(&local->hw, dbm); } -static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode) +static inline int drv_set_cca_mode(struct ieee802154_local *local, + const struct wpan_phy_cca *cca) { might_sleep(); @@ -79,7 +80,7 @@ static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode) return -EOPNOTSUPP; } - return local->ops->set_cca_mode(&local->hw, cca_mode); + return local->ops->set_cca_mode(&local->hw, cca); } static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 9ae893057dd7..6fb6bdf9868c 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -137,25 +137,11 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) static int mac802154_slave_open(struct net_device *dev) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); - struct ieee802154_sub_if_data *subif; struct ieee802154_local *local = sdata->local; int res = 0; ASSERT_RTNL(); - if (sdata->vif.type == NL802154_IFTYPE_NODE) { - mutex_lock(&sdata->local->iflist_mtx); - list_for_each_entry(subif, &sdata->local->interfaces, list) { - if (subif != sdata && - subif->vif.type == sdata->vif.type && - ieee802154_sdata_running(subif)) { - mutex_unlock(&sdata->local->iflist_mtx); - return -EBUSY; - } - } - mutex_unlock(&sdata->local->iflist_mtx); - } - set_bit(SDATA_STATE_RUNNING, &sdata->state); if (!local->open_count) { @@ -175,6 +161,88 @@ err: return res; } +static int +ieee802154_check_mac_settings(struct ieee802154_local *local, + struct wpan_dev *wpan_dev, + struct wpan_dev *nwpan_dev) +{ + ASSERT_RTNL(); + + if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) { + if (wpan_dev->promiscuous_mode != nwpan_dev->promiscuous_mode) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_AFILT) { + if (wpan_dev->pan_id != nwpan_dev->pan_id) + return -EBUSY; + + if (wpan_dev->short_addr != nwpan_dev->short_addr) + return -EBUSY; + + if (wpan_dev->extended_addr != nwpan_dev->extended_addr) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) { + if (wpan_dev->min_be != nwpan_dev->min_be) + return -EBUSY; + + if (wpan_dev->max_be != nwpan_dev->max_be) + return -EBUSY; + + if (wpan_dev->csma_retries != nwpan_dev->csma_retries) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) { + if (wpan_dev->frame_retries != nwpan_dev->frame_retries) + return -EBUSY; + } + + if (local->hw.flags & IEEE802154_HW_LBT) { + if (wpan_dev->lbt != nwpan_dev->lbt) + return -EBUSY; + } + + return 0; +} + +static int +ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, + enum nl802154_iftype iftype) +{ + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_sub_if_data *nsdata; + + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(nsdata, &local->interfaces, list) { + if (nsdata != sdata && ieee802154_sdata_running(nsdata)) { + int ret; + + /* TODO currently we don't support multiple node types + * we need to run skb_clone at rx path. Check if there + * exist really an use case if we need to support + * multiple node types at the same time. + */ + if (sdata->vif.type == NL802154_IFTYPE_NODE && + nsdata->vif.type == NL802154_IFTYPE_NODE) + return -EBUSY; + + /* check all phy mac sublayer settings are the same. + * We have only one phy, different values makes trouble. + */ + ret = ieee802154_check_mac_settings(local, wpan_dev, + &nsdata->wpan_dev); + if (ret < 0) + return ret; + } + } + + return 0; +} + static int mac802154_wpan_open(struct net_device *dev) { int rc; @@ -183,6 +251,10 @@ static int mac802154_wpan_open(struct net_device *dev) struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct wpan_phy *phy = sdata->local->phy; + rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type); + if (rc < 0) + return rc; + rc = mac802154_slave_open(dev); if (rc < 0) return rc; diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 6aacb1816889..bdccb4ecd30f 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -81,7 +81,7 @@ static int mac802154_set_mac_params(struct net_device *dev, /* PHY */ wpan_dev->wpan_phy->transmit_power = params->transmit_power; - wpan_dev->wpan_phy->cca_mode = params->cca_mode; + wpan_dev->wpan_phy->cca = params->cca; wpan_dev->wpan_phy->cca_ed_level = params->cca_ed_level; /* MAC */ @@ -98,7 +98,7 @@ static int mac802154_set_mac_params(struct net_device *dev, } if (local->hw.flags & IEEE802154_HW_CCA_MODE) { - ret = drv_set_cca_mode(local, params->cca_mode); + ret = drv_set_cca_mode(local, ¶ms->cca); if (ret < 0) return ret; } @@ -122,7 +122,7 @@ static void mac802154_get_mac_params(struct net_device *dev, /* PHY */ params->transmit_power = wpan_dev->wpan_phy->transmit_power; - params->cca_mode = wpan_dev->wpan_phy->cca_mode; + params->cca = wpan_dev->wpan_phy->cca; params->cca_ed_level = wpan_dev->wpan_phy->cca_ed_level; /* MAC */ diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index ca27837974fe..809df534a720 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -31,10 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP))) + SKB_GSO_TCP_ECN))) goto out; /* Setup inner SKB. */ @@ -63,14 +60,14 @@ out: return segs; } -static struct packet_offload mpls_mc_offload = { +static struct packet_offload mpls_mc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_MC), .callbacks = { .gso_segment = mpls_gso_segment, }, }; -static struct packet_offload mpls_uc_offload = { +static struct packet_offload mpls_uc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_UC), .callbacks = { .gso_segment = mpls_gso_segment, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 990decba1fe4..b87ca32efa0b 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -659,16 +659,24 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -static int ip_vs_route_me_harder(int af, struct sk_buff *skb) +static int ip_vs_route_me_harder(int af, struct sk_buff *skb, + unsigned int hooknum) { + if (!sysctl_snat_reroute(skb)) + return 0; + /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ + if (NF_INET_LOCAL_IN == hooknum) + return 0; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0) + struct dst_entry *dst = skb_dst(skb); + + if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && + ip6_route_me_harder(skb) != 0) return 1; } else #endif - if ((sysctl_snat_reroute(skb) || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && + if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) return 1; @@ -791,7 +799,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, union nf_inet_addr *snet, __u8 protocol, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, - unsigned int offset, unsigned int ihl) + unsigned int offset, unsigned int ihl, + unsigned int hooknum) { unsigned int verdict = NF_DROP; @@ -821,7 +830,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); - if (ip_vs_route_me_harder(af, skb)) + if (ip_vs_route_me_harder(af, skb, hooknum)) goto out; /* do the statistics and put it back */ @@ -916,7 +925,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, - pp, ciph.len, ihl); + pp, ciph.len, ihl, hooknum); } #ifdef CONFIG_IP_VS_IPV6 @@ -981,7 +990,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, snet.in6 = ciph.saddr.in6; writable = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, - pp, writable, sizeof(struct ipv6hdr)); + pp, writable, sizeof(struct ipv6hdr), + hooknum); } #endif @@ -1040,7 +1050,8 @@ static inline bool is_new_conn(const struct sk_buff *skb, */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph, + unsigned int hooknum) { struct ip_vs_protocol *pp = pd->pp; @@ -1078,7 +1089,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_vs_route_me_harder(af, skb)) + if (ip_vs_route_me_harder(af, skb, hooknum)) goto drop; IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); @@ -1181,7 +1192,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) cp = pp->conn_out_get(af, skb, &iph, 0); if (likely(cp)) - return handle_response(af, skb, pd, cp, &iph); + return handle_response(af, skb, pd, cp, &iph, hooknum); if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b8295a430a56..e55759056361 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2887,7 +2887,8 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, if (ip_vs_genl_fill_service(skb, svc) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -3079,7 +3080,8 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, if (ip_vs_genl_fill_dest(skb, dest) < 0) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -3215,7 +3217,8 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) goto nla_put_failure; - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 1d5341f3761d..5d3daae98bf0 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -183,6 +183,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct nf_conn *ct; struct net *net; + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -191,8 +193,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; @@ -322,6 +322,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; struct net *net; + /* no diff required for incoming packets */ + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -330,9 +333,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - /* no diff required for incoming packets */ - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a11674806707..13fad8668f83 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,16 +611,15 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - /* We have to check the DYING flag inside the lock to prevent - a race against nf_ct_get_next_corpse() possibly called from - user context, else we insert an already 'dead' hash, blocking - further use of that particular connection -JM */ + /* We have to check the DYING flag after unlink to prevent + * a race against nf_ct_get_next_corpse() possibly called from + * user context, else we insert an already 'dead' hash, blocking + * further use of that particular connection -JM. + */ + nf_ct_del_from_dying_or_unconfirmed_list(ct); - if (unlikely(nf_ct_is_dying(ct))) { - nf_conntrack_double_unlock(hash, reply_hash); - local_bh_enable(); - return NF_ACCEPT; - } + if (unlikely(nf_ct_is_dying(ct))) + goto out; /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're @@ -636,8 +635,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - nf_ct_del_from_dying_or_unconfirmed_list(ct); - /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -673,6 +670,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: + nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); @@ -1426,12 +1424,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush_report(struct net *net, u32 portid, int report) -{ - nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report); -} -EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); - static int untrack_refs(void) { int cnt = 0, cpu; @@ -1624,13 +1616,18 @@ int nf_conntrack_init_start(void) for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); - /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB - * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ if (!nf_conntrack_htable_size) { + /* Idea from tcp.c: use 1/16384 of memory. + * On i386: 32MB machine has 512 buckets. + * >= 1GB machines have 16384 buckets. + * >= 4GB machines have 65536 buckets. + */ nf_conntrack_htable_size = (((totalram_pages << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); - if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) + if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) + nf_conntrack_htable_size = 65536; + else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) nf_conntrack_htable_size = 16384; if (nf_conntrack_htable_size < 32) nf_conntrack_htable_size = 32; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1bd9ed9e62f6..d1c23940a86a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -749,13 +749,47 @@ static int ctnetlink_done(struct netlink_callback *cb) return 0; } -struct ctnetlink_dump_filter { +struct ctnetlink_filter { struct { u_int32_t val; u_int32_t mask; } mark; }; +static struct ctnetlink_filter * +ctnetlink_alloc_filter(const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_MARK + struct ctnetlink_filter *filter; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (filter == NULL) + return ERR_PTR(-ENOMEM); + + filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); + filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + return filter; +#else + return ERR_PTR(-EOPNOTSUPP); +#endif +} + +static int ctnetlink_filter_match(struct nf_conn *ct, void *data) +{ + struct ctnetlink_filter *filter = data; + + if (filter == NULL) + return 1; + +#ifdef CONFIG_NF_CONNTRACK_MARK + if ((ct->mark & filter->mark.mask) == filter->mark.val) + return 1; +#endif + + return 0; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { @@ -768,10 +802,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) int res; spinlock_t *lockp; -#ifdef CONFIG_NF_CONNTRACK_MARK - const struct ctnetlink_dump_filter *filter = cb->data; -#endif - last = (struct nf_conn *)cb->args[1]; local_bh_disable(); @@ -798,12 +828,9 @@ restart: continue; cb->args[1] = 0; } -#ifdef CONFIG_NF_CONNTRACK_MARK - if (filter && !((ct->mark & filter->mark.mask) == - filter->mark.val)) { + if (!ctnetlink_filter_match(ct, cb->data)) continue; - } -#endif + rcu_read_lock(); res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, @@ -1001,6 +1028,25 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { .len = NF_CT_LABELS_MAX_SIZE }, }; +static int ctnetlink_flush_conntrack(struct net *net, + const struct nlattr * const cda[], + u32 portid, int report) +{ + struct ctnetlink_filter *filter = NULL; + + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); + } + + nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter, + portid, report); + kfree(filter); + + return 0; +} + static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -1024,11 +1070,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else if (cda[CTA_TUPLE_REPLY]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { - /* Flush the whole table */ - nf_conntrack_flush_report(net, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)); - return 0; + return ctnetlink_flush_conntrack(net, cda, + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); } if (err < 0) @@ -1076,21 +1120,16 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, .dump = ctnetlink_dump_table, .done = ctnetlink_done, }; -#ifdef CONFIG_NF_CONNTRACK_MARK + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { - struct ctnetlink_dump_filter *filter; + struct ctnetlink_filter *filter; - filter = kzalloc(sizeof(struct ctnetlink_dump_filter), - GFP_ATOMIC); - if (filter == NULL) - return -ENOMEM; + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); - filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->mark.mask = - ntohl(nla_get_be32(cda[CTA_MARK_MASK])); c.data = filter; } -#endif return netlink_dump_start(ctnl, skb, nlh, &c); } diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index f6e2ae91a80b..ce3e840c8704 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -98,9 +98,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, new_end_seq = htonl(ntohl(sack->end_seq) - seq->offset_before); - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); + pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n", + ntohl(sack->start_seq), ntohl(new_start_seq), + ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, sack->start_seq, new_start_seq, 0); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 43c926cae9c0..0d8448f19dfe 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -425,8 +425,7 @@ static int netfilter_log_sysctl_init(struct net *net) nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i]; nf_log_sysctl_table[i].data = NULL; - nf_log_sysctl_table[i].maxlen = - NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN; nf_log_sysctl_table[i].mode = 0644; nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 129a8daa4abf..199fd0f27b0e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -427,7 +427,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -713,16 +714,12 @@ static int nft_flush_table(struct nft_ctx *ctx) struct nft_chain *chain, *nc; struct nft_set *set, *ns; - list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + list_for_each_entry(chain, &ctx->table->chains, list) { ctx->chain = chain; err = nft_delrule_by_chain(ctx); if (err < 0) goto out; - - err = nft_delchain(ctx); - if (err < 0) - goto out; } list_for_each_entry_safe(set, ns, &ctx->table->sets, list) { @@ -735,6 +732,14 @@ static int nft_flush_table(struct nft_ctx *ctx) goto out; } + list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + ctx->chain = chain; + + err = nft_delchain(ctx); + if (err < 0) + goto out; + } + err = nft_deltable(ctx); out: return err; @@ -967,7 +972,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -1130,9 +1136,11 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) /* Restore old counters on this cpu, no problem. Per-cpu statistics * are not exposed to userspace. */ + preempt_disable(); stats = this_cpu_ptr(newstats); stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + preempt_enable(); return newstats; } @@ -1258,8 +1266,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN, sizeof(struct nft_trans_chain)); - if (trans == NULL) + if (trans == NULL) { + free_percpu(stats); return -ENOMEM; + } nft_trans_chain_stats(trans) = stats; nft_trans_chain_update(trans) = true; @@ -1315,8 +1325,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, hookfn = type->hooks[hooknum]; basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); - if (basechain == NULL) + if (basechain == NULL) { + module_put(type->owner); return -ENOMEM; + } if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); @@ -1703,7 +1715,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -2357,7 +2370,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; nla_nest_end(skb, desc); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -3031,7 +3045,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, nla_nest_end(skb, nest); - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -3320,7 +3335,8 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq))) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_trim(skb, nlh); @@ -3749,6 +3765,24 @@ int nft_chain_validate_dependency(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_dependency); +int nft_chain_validate_hooks(const struct nft_chain *chain, + unsigned int hook_flags) +{ + struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + + if ((1 << basechain->ops[0].hooknum) & hook_flags) + return 0; + + return -EOPNOTSUPP; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); + /* * Loop detection - walk through the ruleset beginning at the destination chain * of a new jump until either the source chain is reached (loop) or all diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 13c2e17bbe27..8b117c90ecd7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -272,7 +272,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { - struct sk_buff *nskb, *oskb = skb; + struct sk_buff *oskb = skb; struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; @@ -283,12 +283,11 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL); replay: - nskb = netlink_skb_clone(oskb, GFP_KERNEL); - if (!nskb) + skb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!skb) return netlink_ack(oskb, nlh, -ENOMEM); - nskb->sk = oskb->sk; - skb = nskb; + skb->sk = oskb->sk; nfnl_lock(subsys_id); ss = rcu_dereference_protected(table[subsys_id].subsys, @@ -305,7 +304,7 @@ replay: { nfnl_unlock(subsys_id); netlink_ack(skb, nlh, -EOPNOTSUPP); - return kfree_skb(nskb); + return kfree_skb(skb); } } @@ -321,7 +320,8 @@ replay: nlh = nlmsg_hdr(skb); err = 0; - if (nlh->nlmsg_len < NLMSG_HDRLEN) { + if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || + skb->len < nlh->nlmsg_len) { err = -EINVAL; goto ack; } @@ -385,7 +385,7 @@ replay: nfnl_err_reset(&err_list); ss->abort(oskb); nfnl_unlock(subsys_id); - kfree_skb(nskb); + kfree_skb(skb); goto replay; } } @@ -426,7 +426,7 @@ done: nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); - kfree_skb(nskb); + kfree_skb(skb); } static void nfnetlink_rcv(struct sk_buff *skb) @@ -463,13 +463,13 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static int nfnetlink_bind(int group) +static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; int type; if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) - return -EINVAL; + return 0; type = nfnl_group2type[group]; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 9e287cb56a04..a5599fc51a6f 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -86,7 +86,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, static int nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { - const struct nf_conn_help *help = nfct_help(ct); + struct nf_conn_help *help = nfct_help(ct); if (attr == NULL) return -EINVAL; @@ -94,7 +94,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) if (help->helper->data_len == 0) return -EINVAL; - memcpy(&help->data, nla_data(attr), help->helper->data_len); + memcpy(help->data, nla_data(attr), help->helper->data_len); return 0; } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 265e190f2218..c598f74063a1 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -19,6 +19,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_bridge/ebtables.h> #include <net/netfilter/nf_tables.h> static int nft_compat_chain_validate_dependency(const char *tablename, @@ -40,6 +41,7 @@ static int nft_compat_chain_validate_dependency(const char *tablename, union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; + struct ebt_entry ebt; }; static inline void @@ -50,9 +52,9 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) par->hotdrop = false; } -static void nft_target_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_target_eval_xt(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); struct xt_target *target = expr->ops->data; @@ -66,7 +68,7 @@ static void nft_target_eval(const struct nft_expr *expr, if (pkt->xt.hotdrop) ret = NF_DROP; - switch(ret) { + switch (ret) { case XT_CONTINUE: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; break; @@ -74,7 +76,41 @@ static void nft_target_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = ret; break; } - return; +} + +static void nft_target_eval_bridge(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct sk_buff *skb = pkt->skb; + int ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info); + + ret = target->target(skb, &pkt->xt); + + if (pkt->xt.hotdrop) + ret = NF_DROP; + + switch (ret) { + case EBT_ACCEPT: + data[NFT_REG_VERDICT].verdict = NF_ACCEPT; + break; + case EBT_DROP: + data[NFT_REG_VERDICT].verdict = NF_DROP; + break; + case EBT_CONTINUE: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + case EBT_RETURN: + data[NFT_REG_VERDICT].verdict = NFT_RETURN; + break; + default: + data[NFT_REG_VERDICT].verdict = ret; + break; + } } static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { @@ -100,6 +136,10 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; + case NFPROTO_BRIDGE: + entry->ebt.ethproto = proto; + entry->ebt.invflags = inv ? EBT_IPROTO : 0; + break; } par->entryinfo = entry; par->target = target; @@ -307,6 +347,10 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; + case NFPROTO_BRIDGE: + entry->ebt.ethproto = proto; + entry->ebt.invflags = inv ? EBT_IPROTO : 0; + break; } par->entryinfo = entry; par->match = match; @@ -490,6 +534,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, case AF_INET6: fmt = "ip6t_%s"; break; + case NFPROTO_BRIDGE: + fmt = "ebt_%s"; + break; default: pr_err("nft_compat: unsupported protocol %d\n", nfmsg->nfgen_family); @@ -663,13 +710,17 @@ nft_target_select_ops(const struct nft_ctx *ctx, nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); - nft_target->ops.eval = nft_target_eval; nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; nft_target->ops.data = target; + if (family == NFPROTO_BRIDGE) + nft_target->ops.eval = nft_target_eval_bridge; + else + nft_target->ops.eval = nft_target_eval_xt; + list_add(&nft_target->head, &nft_target_list); return &nft_target->ops; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 1e316ce4cb5d..61e6c407476a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -33,7 +33,7 @@ static bool nft_hash_lookup(const struct nft_set *set, const struct nft_data *key, struct nft_data *data) { - const struct rhashtable *priv = nft_set_priv(set); + struct rhashtable *priv = nft_set_priv(set); const struct nft_hash_elem *he; he = rhashtable_lookup(priv, key); @@ -83,69 +83,97 @@ static void nft_hash_remove(const struct nft_set *set, const struct nft_set_elem *elem) { struct rhashtable *priv = nft_set_priv(set); - struct rhash_head *he, __rcu **pprev; - pprev = elem->cookie; - he = rht_dereference((*pprev), priv); + rhashtable_remove(priv, elem->cookie); + synchronize_rcu(); + kfree(elem->cookie); +} - rhashtable_remove_pprev(priv, he, pprev); +struct nft_compare_arg { + const struct nft_set *set; + struct nft_set_elem *elem; +}; - synchronize_rcu(); - kfree(he); +static bool nft_hash_compare(void *ptr, void *arg) +{ + struct nft_hash_elem *he = ptr; + struct nft_compare_arg *x = arg; + + if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) { + x->elem->cookie = he; + x->elem->flags = 0; + if (x->set->flags & NFT_SET_MAP) + nft_data_copy(&x->elem->data, he->data); + + return true; + } + + return false; } static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv); - struct rhash_head __rcu * const *pprev; - struct nft_hash_elem *he; - u32 h; - - h = rhashtable_hashfn(priv, &elem->key, set->klen); - pprev = &tbl->buckets[h]; - rht_for_each_entry_rcu(he, tbl->buckets[h], node) { - if (nft_data_cmp(&he->key, &elem->key, set->klen)) { - pprev = &he->node.next; - continue; - } + struct rhashtable *priv = nft_set_priv(set); + struct nft_compare_arg arg = { + .set = set, + .elem = elem, + }; - elem->cookie = (void *)pprev; - elem->flags = 0; - if (set->flags & NFT_SET_MAP) - nft_data_copy(&elem->data, he->data); + if (rhashtable_lookup_compare(priv, &elem->key, + &nft_hash_compare, &arg)) return 0; - } + return -ENOENT; } static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; + struct rhashtable *priv = nft_set_priv(set); const struct nft_hash_elem *he; + struct rhashtable_iter hti; struct nft_set_elem elem; - unsigned int i; + int err; - tbl = rht_dereference_rcu(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(he, tbl->buckets[i], node) { - if (iter->count < iter->skip) - goto cont; - - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; - - iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) - return; -cont: - iter->count++; + err = rhashtable_walk_init(priv, &hti); + iter->err = err; + if (err) + return; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) { + iter->err = err; + goto out; + } + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + err = PTR_ERR(he); + if (err != -EAGAIN) { + iter->err = err; + goto out; + } } + + if (iter->count < iter->skip) + goto cont; + + memcpy(&elem.key, &he->key, sizeof(elem.key)); + if (set->flags & NFT_SET_MAP) + memcpy(&elem.data, he->data, sizeof(elem.data)); + elem.flags = 0; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + goto out; + +cont: + iter->count++; } + +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); } static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) @@ -153,13 +181,6 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) return sizeof(struct rhashtable); } -#ifdef CONFIG_PROVE_LOCKING -static int lockdep_nfnl_lock_is_held(void *parent) -{ - return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); -} -#endif - static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) @@ -173,9 +194,6 @@ static int nft_hash_init(const struct nft_set *set, .hashfn = jhash, .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = lockdep_nfnl_lock_is_held, -#endif }; return rhashtable_init(priv, ¶ms); @@ -183,18 +201,23 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_destroy(const struct nft_set *set) { - const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl = priv->tbl; - struct nft_hash_elem *he, *next; + struct rhashtable *priv = nft_set_priv(set); + const struct bucket_table *tbl; + struct nft_hash_elem *he; + struct rhash_head *pos, *next; unsigned int i; + /* Stop an eventual async resizing */ + priv->being_destroyed = true; + mutex_lock(&priv->mutex); + + tbl = rht_dereference(priv->tbl, priv); for (i = 0; i < tbl->size; i++) { - for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node); - he != NULL; he = next) { - next = rht_entry(he->node.next, struct nft_hash_elem, node); + rht_for_each_entry_safe(he, pos, next, tbl, i, node) nft_hash_elem_destroy(set, he); - } } + mutex_unlock(&priv->mutex); + rhashtable_destroy(priv); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 6404a726d17b..9615b8b9fb37 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -39,6 +39,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET] = { .type = NLA_STRING }, + [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index d1ffd5eb3a9b..9aea747b43ea 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -21,6 +21,21 @@ const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { }; EXPORT_SYMBOL_GPL(nft_masq_policy); +int nft_masq_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_POST_ROUTING)); +} +EXPORT_SYMBOL_GPL(nft_masq_validate); + int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -28,8 +43,8 @@ int nft_masq_init(const struct nft_ctx *ctx, struct nft_masq *priv = nft_expr_priv(expr); int err; - err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); - if (err < 0) + err = nft_masq_validate(ctx, expr, NULL); + if (err) return err; if (tb[NFTA_MASQ_FLAGS] == NULL) @@ -60,12 +75,5 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_masq_dump); -int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); -} -EXPORT_SYMBOL_GPL(nft_masq_validate); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index afe2b0b45ec4..a0837c6c9283 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16) - data[priv->sreg_proto_min].data[0]; - range.max_proto.all = (__force __be16) - data[priv->sreg_proto_max].data[0]; + range.min_proto.all = + *(__be16 *)&data[priv->sreg_proto_min].data[0]; + range.max_proto.all = + *(__be16 *)&data[priv->sreg_proto_max].data[0]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } @@ -88,17 +88,40 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { [NFTA_NAT_FLAGS] = { .type = NLA_U32 }, }; -static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_nat_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { struct nft_nat *priv = nft_expr_priv(expr); - u32 family; int err; err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; + switch (priv->type) { + case NFT_NAT_SNAT: + err = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN)); + break; + case NFT_NAT_DNAT: + err = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT)); + break; + } + + return err; +} + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + u32 family; + int err; + if (tb[NFTA_NAT_TYPE] == NULL || (tb[NFTA_NAT_REG_ADDR_MIN] == NULL && tb[NFTA_NAT_REG_PROTO_MIN] == NULL)) @@ -115,6 +138,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } + err = nft_nat_validate(ctx, expr, NULL); + if (err < 0) + return err; + if (tb[NFTA_NAT_FAMILY] == NULL) return -EINVAL; @@ -219,13 +246,6 @@ nla_put_failure: return -1; } -static int nft_nat_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); -} - static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 9e8093f28311..d7e9e93a4e90 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -23,6 +23,22 @@ const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { }; EXPORT_SYMBOL_GPL(nft_redir_policy); +int nft_redir_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT)); +} +EXPORT_SYMBOL_GPL(nft_redir_validate); + int nft_redir_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -30,7 +46,7 @@ int nft_redir_init(const struct nft_ctx *ctx, struct nft_redir *priv = nft_expr_priv(expr); int err; - err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + err = nft_redir_validate(ctx, expr, NULL); if (err < 0) return err; @@ -88,12 +104,5 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_redir_dump); -int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); -} -EXPORT_SYMBOL_GPL(nft_redir_validate); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index c529161cdbf8..0778855ea5e7 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -225,6 +225,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_lock(); list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { + int foptsize, optnum; + f = &kf->finger; if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) @@ -233,110 +235,109 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) optp = _optp; fmatch = FMATCH_WRONG; - if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { - int foptsize, optnum; + if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl)) + continue; - /* - * Should not happen if userspace parser was written correctly. - */ - if (f->wss.wc >= OSF_WSS_MAX) - continue; + /* + * Should not happen if userspace parser was written correctly. + */ + if (f->wss.wc >= OSF_WSS_MAX) + continue; - /* Check options */ + /* Check options */ - foptsize = 0; - for (optnum = 0; optnum < f->opt_num; ++optnum) - foptsize += f->opt[optnum].length; + foptsize = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) + foptsize += f->opt[optnum].length; - if (foptsize > MAX_IPOPTLEN || - optsize > MAX_IPOPTLEN || - optsize != foptsize) - continue; + if (foptsize > MAX_IPOPTLEN || + optsize > MAX_IPOPTLEN || + optsize != foptsize) + continue; - check_WSS = f->wss.wc; + check_WSS = f->wss.wc; - for (optnum = 0; optnum < f->opt_num; ++optnum) { - if (f->opt[optnum].kind == (*optp)) { - __u32 len = f->opt[optnum].length; - const __u8 *optend = optp + len; - int loop_cont = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) { + if (f->opt[optnum].kind == (*optp)) { + __u32 len = f->opt[optnum].length; + const __u8 *optend = optp + len; + int loop_cont = 0; - fmatch = FMATCH_OK; + fmatch = FMATCH_OK; - switch (*optp) { - case OSFOPT_MSS: - mss = optp[3]; - mss <<= 8; - mss |= optp[2]; + switch (*optp) { + case OSFOPT_MSS: + mss = optp[3]; + mss <<= 8; + mss |= optp[2]; - mss = ntohs((__force __be16)mss); - break; - case OSFOPT_TS: - loop_cont = 1; - break; - } + mss = ntohs((__force __be16)mss); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } - optp = optend; - } else - fmatch = FMATCH_OPT_WRONG; + optp = optend; + } else + fmatch = FMATCH_OPT_WRONG; - if (fmatch != FMATCH_OK) - break; - } + if (fmatch != FMATCH_OK) + break; + } - if (fmatch != FMATCH_OPT_WRONG) { - fmatch = FMATCH_WRONG; + if (fmatch != FMATCH_OPT_WRONG) { + fmatch = FMATCH_WRONG; - switch (check_WSS) { - case OSF_WSS_PLAIN: - if (f->wss.val == 0 || window == f->wss.val) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MSS: - /* - * Some smart modems decrease mangle MSS to - * SMART_MSS_2, so we check standard, decreased - * and the one provided in the fingerprint MSS - * values. - */ + switch (check_WSS) { + case OSF_WSS_PLAIN: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MSS: + /* + * Some smart modems decrease mangle MSS to + * SMART_MSS_2, so we check standard, decreased + * and the one provided in the fingerprint MSS + * values. + */ #define SMART_MSS_1 1460 #define SMART_MSS_2 1448 - if (window == f->wss.val * mss || - window == f->wss.val * SMART_MSS_1 || - window == f->wss.val * SMART_MSS_2) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MTU: - if (window == f->wss.val * (mss + 40) || - window == f->wss.val * (SMART_MSS_1 + 40) || - window == f->wss.val * (SMART_MSS_2 + 40)) - fmatch = FMATCH_OK; - break; - case OSF_WSS_MODULO: - if ((window % f->wss.val) == 0) - fmatch = FMATCH_OK; - break; - } + if (window == f->wss.val * mss || + window == f->wss.val * SMART_MSS_1 || + window == f->wss.val * SMART_MSS_2) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MTU: + if (window == f->wss.val * (mss + 40) || + window == f->wss.val * (SMART_MSS_1 + 40) || + window == f->wss.val * (SMART_MSS_2 + 40)) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MODULO: + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; } + } - if (fmatch != FMATCH_OK) - continue; + if (fmatch != FMATCH_OK) + continue; - fcount++; + fcount++; - if (info->flags & XT_OSF_LOG) - nf_log_packet(net, p->family, p->hooknum, skb, - p->in, p->out, NULL, - "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", - f->genre, f->version, f->subtype, - &ip->saddr, ntohs(tcp->source), - &ip->daddr, ntohs(tcp->dest), - f->ttl - ip->ttl); + if (info->flags & XT_OSF_LOG) + nf_log_packet(net, p->family, p->hooknum, skb, + p->in, p->out, NULL, + "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", + f->genre, f->version, f->subtype, + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest), + f->ttl - ip->ttl); - if ((info->flags & XT_OSF_LOG) && - info->loglevel == XT_OSF_LOGLEVEL_FIRST) - break; - } + if ((info->flags & XT_OSF_LOG) && + info->loglevel == XT_OSF_LOGLEVEL_FIRST) + break; } rcu_read_unlock(); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c2f2a53a4879..7fd1104ba900 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -324,8 +324,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, return 0; add_std_failure: - if (doi_def) - cipso_v4_doi_free(doi_def); + cipso_v4_doi_free(doi_def); return ret_val; } @@ -641,7 +640,8 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) if (ret_val != 0) goto listall_cb_failure; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index a845cd4cf21e..28cddc85b700 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -1065,10 +1065,12 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { + unsigned char *ptr; + switch (family) { case AF_INET: - if (CIPSO_V4_OPTEXIST(skb) && - cipso_v4_skbuff_getattr(skb, secattr) == 0) + ptr = cipso_v4_optptr(skb); + if (ptr && cipso_v4_getattr(ptr, secattr) == 0) return 0; break; #if IS_ENABLED(CONFIG_IPV6) @@ -1094,7 +1096,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, */ void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { - if (CIPSO_V4_OPTEXIST(skb)) + if (cipso_v4_optptr(skb)) cipso_v4_error(skb, error, gateway); } @@ -1126,11 +1128,14 @@ void netlbl_cache_invalidate(void) int netlbl_cache_add(const struct sk_buff *skb, const struct netlbl_lsm_secattr *secattr) { + unsigned char *ptr; + if ((secattr->flags & NETLBL_SECATTR_CACHE) == 0) return -ENOMSG; - if (CIPSO_V4_OPTEXIST(skb)) - return cipso_v4_cache_add(skb, secattr); + ptr = cipso_v4_optptr(skb); + if (ptr) + return cipso_v4_cache_add(ptr, secattr); return -ENOMSG; } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index e66e977ef2fa..70440748fe5c 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -93,23 +93,20 @@ static int netlbl_mgmt_add_common(struct genl_info *info, struct netlbl_audit *audit_info) { int ret_val = -EINVAL; - struct netlbl_dom_map *entry = NULL; struct netlbl_domaddr_map *addrmap = NULL; struct cipso_v4_doi *cipsov4 = NULL; u32 tmp_val; + struct netlbl_dom_map *entry = kzalloc(sizeof(*entry), GFP_KERNEL); - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } + if (!entry) + return -ENOMEM; entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); if (entry->domain == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_entry; } nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); @@ -125,16 +122,16 @@ static int netlbl_mgmt_add_common(struct genl_info *info, break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) - goto add_failure; + goto add_free_domain; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) - goto add_failure; + goto add_free_domain; entry->def.cipso = cipsov4; break; default: - goto add_failure; + goto add_free_domain; } if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { @@ -145,7 +142,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); @@ -153,12 +150,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != sizeof(struct in_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != sizeof(struct in_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); @@ -166,7 +163,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_addrmap; } map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; @@ -178,7 +175,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) { kfree(map); - goto add_failure; + goto add_free_addrmap; } entry->def.type = NETLBL_NLTYPE_ADDRSELECT; @@ -192,7 +189,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); @@ -200,12 +197,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; - goto add_failure; + goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); @@ -213,7 +210,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; - goto add_failure; + goto add_free_addrmap; } map->list.addr = *addr; map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; @@ -227,7 +224,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) { kfree(map); - goto add_failure; + goto add_free_addrmap; } entry->def.type = NETLBL_NLTYPE_ADDRSELECT; @@ -237,16 +234,17 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) - goto add_failure; + goto add_free_addrmap; return 0; -add_failure: - if (cipsov4) - cipso_v4_doi_putdef(cipsov4); - if (entry) - kfree(entry->domain); +add_free_addrmap: kfree(addrmap); +add_doi_put_def: + cipso_v4_doi_putdef(cipsov4); +add_free_domain: + kfree(entry->domain); +add_free_entry: kfree(entry); return ret_val; } @@ -456,7 +454,8 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) goto listall_cb_failure; cb_arg->seq++; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); @@ -620,7 +619,8 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, if (ret_val != 0) goto protocols_cb_failure; - return genlmsg_end(skb, data); + genlmsg_end(skb, data); + return 0; protocols_cb_failure: genlmsg_cancel(skb, data); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 78a63c18779e..aec7994f78cf 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1163,7 +1163,8 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, goto list_cb_failure; cb_arg->seq++; - return genlmsg_end(cb_arg->skb, data); + genlmsg_end(cb_arg->skb, data); + return 0; list_cb_failure: genlmsg_cancel(cb_arg->skb, data); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 074cf3e91c6f..2702673f0f23 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -61,6 +61,7 @@ #include <linux/rhashtable.h> #include <asm/cacheflush.h> #include <linux/hash.h> +#include <linux/genetlink.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -97,12 +98,12 @@ static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: - * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock - * combined with an RCU read-side lock. Insertion and removal are protected - * with nl_sk_hash_lock while using RCU list modification primitives and may - * run in parallel to nl_table_lock protected lookups. Destruction of the - * Netlink socket may only occur *after* nl_table_lock has been acquired - * either during or after the socket has been removed from the list. + * Lookup and traversal are protected with an RCU read-side lock. Insertion + * and removal are protected with per bucket lock while using RCU list + * modification primitives and may run in parallel to RCU protected lookups. + * Destruction of the Netlink socket may only occur *after* nl_table_lock has + * been acquired * either during or after the socket has been removed from + * the list and after an RCU grace period. */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); @@ -110,19 +111,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); -/* Protects netlink socket hash table mutations */ -DEFINE_MUTEX(nl_sk_hash_lock); -EXPORT_SYMBOL_GPL(nl_sk_hash_lock); - -#ifdef CONFIG_PROVE_LOCKING -static int lockdep_nl_sk_hash_is_held(void *parent) -{ - if (debug_locks) - return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); - return 1; -} -#endif - static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); @@ -707,7 +695,7 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, u32 dst_portid, u32 dst_group, - struct sock_iocb *siocb) + struct scm_cookie *scm) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; @@ -753,7 +741,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm->creds; err = security_netlink_send(sk, skb); if (err) { @@ -832,7 +820,7 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap #define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 +#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 #endif /* CONFIG_NETLINK_MMAP */ static void netlink_skb_destructor(struct sk_buff *skb) @@ -1002,26 +990,33 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, .net = net, .portid = portid, }; - u32 hash; - - hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid)); - return rhashtable_lookup_compare(&table->hash, hash, + return rhashtable_lookup_compare(&table->hash, &portid, &netlink_compare, &arg); } +static bool __netlink_insert(struct netlink_table *table, struct sock *sk) +{ + struct netlink_compare_arg arg = { + .net = sock_net(sk), + .portid = nlk_sk(sk)->portid, + }; + + return rhashtable_lookup_compare_insert(&table->hash, + &nlk_sk(sk)->node, + &netlink_compare, &arg); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { struct netlink_table *table = &nl_table[protocol]; struct sock *sk; - read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); - read_unlock(&nl_table_lock); return sk; } @@ -1052,29 +1047,33 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 portid) +static int netlink_insert(struct sock *sk, u32 portid) { struct netlink_table *table = &nl_table[sk->sk_protocol]; - int err = -EADDRINUSE; + int err; - mutex_lock(&nl_sk_hash_lock); - if (__netlink_lookup(table, portid, net)) - goto err; + lock_sock(sk); err = -EBUSY; if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; - if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX)) + if (BITS_PER_LONG > 32 && + unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node); + err = 0; + if (!__netlink_insert(table, sk)) { + err = -EADDRINUSE; + sock_put(sk); + } + err: - mutex_unlock(&nl_sk_hash_lock); + release_sock(sk); return err; } @@ -1082,17 +1081,19 @@ static void netlink_remove(struct sock *sk) { struct netlink_table *table; - mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } - mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); - if (nlk_sk(sk)->subscriptions) + if (nlk_sk(sk)->subscriptions) { __sk_del_bind_node(sk); + netlink_update_listeners(sk); + } + if (sk->sk_protocol == NETLINK_GENERIC) + atomic_inc(&genl_sk_destructing_cnt); netlink_table_ungrab(); } @@ -1139,8 +1140,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1192,6 +1193,13 @@ out_module: goto out; } +static void deferred_put_nlk_sk(struct rcu_head *head) +{ + struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + + sock_put(&nlk->sk); +} + static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -1209,6 +1217,20 @@ static int netlink_release(struct socket *sock) * will be purged. */ + /* must not acquire netlink_table_lock in any way again before unbind + * and notifying genetlink is done as otherwise it might deadlock + */ + if (nlk->netlink_unbind) { + int i; + + for (i = 0; i < nlk->ngroups; i++) + if (test_bit(i, nlk->groups)) + nlk->netlink_unbind(sock_net(sk), i + 1); + } + if (sk->sk_protocol == NETLINK_GENERIC && + atomic_dec_return(&genl_sk_destructing_cnt) == 0) + wake_up(&genl_sk_destructing_waitq); + sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); @@ -1226,8 +1248,8 @@ static int netlink_release(struct socket *sock) module_put(nlk->module); - netlink_table_grab(); if (netlink_is_kernel(sk)) { + netlink_table_grab(); BUG_ON(nl_table[sk->sk_protocol].registered == 0); if (--nl_table[sk->sk_protocol].registered == 0) { struct listeners *old; @@ -1241,10 +1263,8 @@ static int netlink_release(struct socket *sock) nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } - } else if (nlk->subscriptions) { - netlink_update_listeners(sk); + netlink_table_ungrab(); } - netlink_table_ungrab(); kfree(nlk->groups); nlk->groups = NULL; @@ -1252,7 +1272,7 @@ static int netlink_release(struct socket *sock) local_bh_disable(); sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); local_bh_enable(); - sock_put(sk); + call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -1267,7 +1287,6 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); - netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1275,13 +1294,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); - netlink_table_ungrab(); goto retry; } rcu_read_unlock(); - netlink_table_ungrab(); - err = netlink_insert(sk, net, portid); + err = netlink_insert(sk, portid); if (err == -EADDRINUSE) goto retry; @@ -1410,9 +1427,10 @@ static int netlink_realloc_groups(struct sock *sk) return err; } -static void netlink_unbind(int group, long unsigned int groups, - struct netlink_sock *nlk) +static void netlink_undo_bind(int group, long unsigned int groups, + struct sock *sk) { + struct netlink_sock *nlk = nlk_sk(sk); int undo; if (!nlk->netlink_unbind) @@ -1420,7 +1438,7 @@ static void netlink_unbind(int group, long unsigned int groups, for (undo = 0; undo < group; undo++) if (test_bit(undo, &groups)) - nlk->netlink_unbind(undo); + nlk->netlink_unbind(sock_net(sk), undo + 1); } static int netlink_bind(struct socket *sock, struct sockaddr *addr, @@ -1458,20 +1476,20 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, for (group = 0; group < nlk->ngroups; group++) { if (!test_bit(group, &groups)) continue; - err = nlk->netlink_bind(group); + err = nlk->netlink_bind(net, group + 1); if (!err) continue; - netlink_unbind(group, groups, nlk); + netlink_undo_bind(group, groups, sk); return err; } } if (!nlk->portid) { err = nladdr->nl_pid ? - netlink_insert(sk, net, nladdr->nl_pid) : + netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups, groups, nlk); + netlink_undo_bind(nlk->ngroups, groups, sk); return err; } } @@ -2122,7 +2140,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (!val || val - 1 >= nlk->ngroups) return -EINVAL; if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { - err = nlk->netlink_bind(val); + err = nlk->netlink_bind(sock_net(sk), val); if (err) return err; } @@ -2131,7 +2149,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) - nlk->netlink_unbind(val); + nlk->netlink_unbind(sock_net(sk), val); err = 0; break; @@ -2241,7 +2259,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); @@ -2255,10 +2272,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) - siocb->scm = &scm; - - err = scm_send(sock, msg, siocb->scm, true); + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; @@ -2284,10 +2298,15 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } + /* It's a really convoluted way for userland to ask for mmaped + * sendmsg(), but that's what we've got... + */ if (netlink_tx_is_mmaped(sk) && + msg->msg_iter.type == ITER_IOVEC && + msg->msg_iter.nr_segs == 1 && msg->msg_iter.iov->iov_base == NULL) { err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - siocb); + &scm); goto out; } @@ -2301,7 +2320,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm.creds; NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; @@ -2323,7 +2342,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: - scm_destroy(siocb->scm); + scm_destroy(&scm); return err; } @@ -2331,7 +2350,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2394,11 +2412,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (nlk->flags & NETLINK_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); - if (NULL == siocb->scm) { - memset(&scm, 0, sizeof(scm)); - siocb->scm = &scm; - } - siocb->scm->creds = *NETLINK_CREDS(skb); + memset(&scm, 0, sizeof(scm)); + scm.creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) copied = data_skb->len; @@ -2413,7 +2428,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } } - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out: netlink_rcv_wake(sk); return err ? : copied; @@ -2474,7 +2489,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (cfg && cfg->input) nlk_sk(sk)->netlink_rcv = cfg->input; - if (netlink_insert(sk, net, 0)) + if (netlink_insert(sk, 0)) goto out_sock_release; nlk = nlk_sk(sk); @@ -2876,97 +2891,97 @@ EXPORT_SYMBOL(nlmsg_notify); #ifdef CONFIG_PROC_FS struct nl_seq_iter { struct seq_net_private p; + struct rhashtable_iter hti; int link; - int hash_idx; }; -static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) +static int netlink_walk_start(struct nl_seq_iter *iter) { - struct nl_seq_iter *iter = seq->private; - int i, j; - struct netlink_sock *nlk; - struct sock *s; - loff_t off = 0; - - for (i = 0; i < MAX_LINKS; i++) { - struct rhashtable *ht = &nl_table[i].hash; - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - for (j = 0; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { - s = (struct sock *)nlk; + int err; - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) { - iter->link = i; - iter->hash_idx = j; - return s; - } - ++off; - } - } + err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); + if (err) { + iter->link = MAX_LINKS; + return err; } - return NULL; + + err = rhashtable_walk_start(&iter->hti); + return err == -EAGAIN ? 0 : err; } -static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nl_table_lock) __acquires(RCU) +static void netlink_walk_stop(struct nl_seq_iter *iter) { - read_lock(&nl_table_lock); - rcu_read_lock(); - return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; + rhashtable_walk_stop(&iter->hti); + rhashtable_walk_exit(&iter->hti); } -static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *__netlink_seq_next(struct seq_file *seq) { - struct rhashtable *ht; + struct nl_seq_iter *iter = seq->private; struct netlink_sock *nlk; - struct nl_seq_iter *iter; - struct net *net; - int i, j; - ++*pos; + do { + for (;;) { + int err; - if (v == SEQ_START_TOKEN) - return netlink_seq_socket_idx(seq, 0); + nlk = rhashtable_walk_next(&iter->hti); - net = seq_file_net(seq); - iter = seq->private; - nlk = v; + if (IS_ERR(nlk)) { + if (PTR_ERR(nlk) == -EAGAIN) + continue; - i = iter->link; - ht = &nl_table[i].hash; - rht_for_each_entry(nlk, nlk->node.next, ht, node) - if (net_eq(sock_net((struct sock *)nlk), net)) - return nlk; + return nlk; + } - j = iter->hash_idx + 1; + if (nlk) + break; - do { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - for (; j < tbl->size; j++) { - rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { - if (net_eq(sock_net((struct sock *)nlk), net)) { - iter->link = i; - iter->hash_idx = j; - return nlk; - } - } + netlink_walk_stop(iter); + if (++iter->link >= MAX_LINKS) + return NULL; + + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); } + } while (sock_net(&nlk->sk) != seq_file_net(seq)); - j = 0; - } while (++i < MAX_LINKS); + return nlk; +} - return NULL; +static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) +{ + struct nl_seq_iter *iter = seq->private; + void *obj = SEQ_START_TOKEN; + loff_t pos; + int err; + + iter->link = 0; + + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); + + for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) + obj = __netlink_seq_next(seq); + + return obj; +} + +static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return __netlink_seq_next(seq); } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) __releases(nl_table_lock) { - rcu_read_unlock(); - read_unlock(&nl_table_lock); + struct nl_seq_iter *iter = seq->private; + + if (iter->link >= MAX_LINKS) + return; + + netlink_walk_stop(iter); } @@ -3113,9 +3128,6 @@ static int __init netlink_proto_init(void) .max_shift = 16, /* 64K */ .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = lockdep_nl_sk_hash_is_held, -#endif }; if (err != 0) diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index b20a1731759b..89008405d6b4 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -2,6 +2,7 @@ #define _AF_NETLINK_H #include <linux/rhashtable.h> +#include <linux/atomic.h> #include <net/sock.h> #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -39,8 +40,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - int (*netlink_bind)(int group); - void (*netlink_unbind)(int group); + int (*netlink_bind)(struct net *net, int group); + void (*netlink_unbind)(struct net *net, int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -50,6 +51,7 @@ struct netlink_sock { #endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; + struct rcu_head rcu; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) @@ -65,14 +67,13 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; extern struct netlink_table *nl_table; extern rwlock_t nl_table_lock; -extern struct mutex nl_sk_hash_lock; #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index de8c74a3c061..3ee63a3cff30 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sk_diag_put_rings_cfg(sk, skb)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); @@ -103,7 +104,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct netlink_table *tbl = &nl_table[protocol]; struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); + const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; @@ -113,7 +114,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, req = nlmsg_data(cb->nlh); for (i = 0; i < htbl->size; i++) { - rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) { + struct rhash_head *pos; + + rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { sk = (struct sock *)nlsk; if (!net_eq(sock_net(sk), net)) @@ -170,7 +173,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) req = nlmsg_data(cb->nlh); - mutex_lock(&nl_sk_hash_lock); + rcu_read_lock(); read_lock(&nl_table_lock); if (req->sdiag_protocol == NDIAG_PROTO_ALL) { @@ -184,7 +187,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return -ENOENT; } @@ -192,7 +195,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return skb->len; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 76393f2f4b22..2ed5f964772e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -23,6 +23,9 @@ static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ static DECLARE_RWSEM(cb_lock); +atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq); + void genl_lock(void) { mutex_lock(&genl_mutex); @@ -435,15 +438,18 @@ int genl_unregister_family(struct genl_family *family) genl_lock_all(); - genl_unregister_mc_groups(family); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { if (family->id != rc->id || strcmp(rc->name, family->name)) continue; + genl_unregister_mc_groups(family); + list_del(&rc->family_list); family->n_ops = 0; - genl_unlock_all(); + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -756,7 +762,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, nla_nest_end(skb, nla_grps); } - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -796,7 +803,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family, nla_nest_end(skb, nest); nla_nest_end(skb, nla_grps); - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -983,11 +991,63 @@ static struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static int genl_bind(struct net *net, int group) +{ + int i, err = -ENOENT; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); + else + err = 0; + break; + } + } + } + up_read(&cb_lock); + + return err; +} + +static void genl_unbind(struct net *net, int group) +{ + int i; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (f->mcast_unbind) + f->mcast_unbind(net, fam_grp); + break; + } + } + } + up_read(&cb_lock); +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, + .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ diff --git a/net/nfc/core.c b/net/nfc/core.c index 819b87702b70..cff3f1614ad4 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -555,7 +555,6 @@ EXPORT_SYMBOL(nfc_find_se); int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) { - struct nfc_se *se; int rc; @@ -605,7 +604,6 @@ error: int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) { - struct nfc_se *se; int rc; @@ -934,6 +932,27 @@ int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) } EXPORT_SYMBOL(nfc_remove_se); +int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx, + struct nfc_evt_transaction *evt_transaction) +{ + int rc; + + pr_debug("transaction: %x\n", se_idx); + + device_lock(&dev->dev); + + if (!evt_transaction) { + rc = -EPROTO; + goto out; + } + + rc = nfc_genl_se_transaction(dev, se_idx, evt_transaction); +out: + device_unlock(&dev->dev); + return rc; +} +EXPORT_SYMBOL(nfc_se_transaction); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 91df487aa0a9..844673cb7c18 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -116,23 +116,6 @@ int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event, } EXPORT_SYMBOL(nfc_hci_send_event); -int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response, - const u8 *param, size_t param_len) -{ - u8 pipe; - - pr_debug("\n"); - - pipe = hdev->gate2pipe[gate]; - if (pipe == NFC_HCI_INVALID_PIPE) - return -EADDRNOTAVAIL; - - return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE, - response, param, param_len, NULL, NULL, - 0); -} -EXPORT_SYMBOL(nfc_hci_send_response); - /* * Execute an hci command sent to gate. * skb will contain response data if success. skb can be NULL if you are not @@ -331,7 +314,7 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) if (r < 0) return r; - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); return 0; } @@ -345,7 +328,7 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, pr_debug("\n"); - if (hdev->gate2pipe[dest_gate] == NFC_HCI_DO_NOT_CREATE_PIPE) + if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE) return 0; if (hdev->gate2pipe[dest_gate] != NFC_HCI_INVALID_PIPE) @@ -380,6 +363,8 @@ open_pipe: return r; } + hdev->pipes[pipe].gate = dest_gate; + hdev->pipes[pipe].dest_host = dest_host; hdev->gate2pipe[dest_gate] = pipe; return 0; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index ef50e7716c4a..6e061da2258a 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -46,6 +46,32 @@ int nfc_hci_result_to_errno(u8 result) } EXPORT_SYMBOL(nfc_hci_result_to_errno); +void nfc_hci_reset_pipes(struct nfc_hci_dev *hdev) +{ + int i = 0; + + for (i = 0; i < NFC_HCI_MAX_PIPES; i++) { + hdev->pipes[i].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST; + } + memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); +} +EXPORT_SYMBOL(nfc_hci_reset_pipes); + +void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host) +{ + int i = 0; + + for (i = 0; i < NFC_HCI_MAX_PIPES; i++) { + if (hdev->pipes[i].dest_host != host) + continue; + + hdev->pipes[i].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST; + } +} +EXPORT_SYMBOL(nfc_hci_reset_pipes_per_host); + static void nfc_hci_msg_tx_work(struct work_struct *work) { struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev, @@ -167,48 +193,69 @@ exit: void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - int r = 0; - u8 gate = nfc_hci_pipe2gate(hdev, pipe); - u8 local_gate, new_pipe; - u8 gate_opened = 0x00; + u8 gate = hdev->pipes[pipe].gate; + u8 status = NFC_HCI_ANY_OK; + struct hci_create_pipe_resp *create_info; + struct hci_delete_pipe_noti *delete_info; + struct hci_all_pipe_cleared_noti *cleared_info; pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: if (skb->len != 5) { - r = -EPROTO; - break; + status = NFC_HCI_ANY_E_NOK; + goto exit; } + create_info = (struct hci_create_pipe_resp *)skb->data; - local_gate = skb->data[3]; - new_pipe = skb->data[4]; - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0); - - /* save the new created pipe and bind with local gate, + /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we * are the destination and it is our local gate */ - hdev->gate2pipe[local_gate] = new_pipe; + hdev->gate2pipe[create_info->dest_gate] = create_info->pipe; + hdev->pipes[create_info->pipe].gate = create_info->dest_gate; + hdev->pipes[create_info->pipe].dest_host = + create_info->src_host; break; case NFC_HCI_ANY_OPEN_PIPE: - /* if the pipe is already created, we allow remote host to - * open it - */ - if (gate != 0xff) - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, - &gate_opened, 1); + if (gate == NFC_HCI_INVALID_GATE) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + break; + case NFC_HCI_ADM_NOTIFY_PIPE_DELETED: + if (skb->len != 1) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + delete_info = (struct hci_delete_pipe_noti *)skb->data; + + hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE; + hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST; break; case NFC_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: - nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0); + if (skb->len != 1) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + cleared_info = (struct hci_all_pipe_cleared_noti *)skb->data; + + nfc_hci_reset_pipes_per_host(hdev, cleared_info->host); break; default: pr_info("Discarded unknown cmd %x to gate %x\n", cmd, gate); - r = -EINVAL; break; } + if (hdev->ops->cmd_received) + hdev->ops->cmd_received(hdev, pipe, cmd, skb); + +exit: + nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE, + status, NULL, 0, NULL, NULL, 0); + kfree_skb(skb); } @@ -330,15 +377,15 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = nfc_hci_pipe2gate(hdev, pipe); + u8 gate = hdev->pipes[pipe].gate; - if (gate == 0xff) { + if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; } if (hdev->ops->event_received) { - r = hdev->ops->event_received(hdev, gate, event, skb); + r = hdev->ops->event_received(hdev, pipe, event, skb); if (r <= 0) goto exit_noskb; } @@ -573,7 +620,7 @@ static int hci_dev_down(struct nfc_dev *nfc_dev) if (hdev->ops->close) hdev->ops->close(hdev); - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); return 0; } @@ -932,7 +979,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, nfc_set_drvdata(hdev->ndev, hdev); - memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); + nfc_hci_reset_pipes(hdev); hdev->quirks = quirks; diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index c3d2e2c1394c..ab4c8e80b1ad 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -65,6 +65,14 @@ struct hci_create_pipe_resp { u8 pipe; } __packed; +struct hci_delete_pipe_noti { + u8 pipe; +} __packed; + +struct hci_all_pipe_cleared_noti { + u8 host; +} __packed; + #define NFC_HCI_FRAGMENT 0x7f #define HCP_HEADER(type, instr) ((((type) & 0x03) << 6) | ((instr) & 0x3f)) @@ -77,8 +85,6 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, data_exchange_cb_t cb, void *cb_context, unsigned long completion_delay); -u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe); - void nfc_hci_hcp_message_rx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, u8 instruction, struct sk_buff *skb); diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index e9de1514656e..1fe725d66085 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -124,17 +124,6 @@ out_skb_err: return err; } -u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe) -{ - int gate; - - for (gate = 0; gate < NFC_HCI_MAX_GATES; gate++) - if (hdev->gate2pipe[gate] == pipe) - return gate; - - return 0xff; -} - /* * Receive hcp message for pipe, with type and cmd. * skb contains optional message data only. diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index 7aeedc43187d..7ed8949266cc 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o -nci-objs := core.o data.o lib.o ntf.o rsp.o +nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o nci-$(CONFIG_NFC_NCI_SPI) += spi.o diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 51feb5e63008..9575a1892607 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -41,10 +41,28 @@ #include <net/nfc/nci_core.h> #include <linux/nfc.h> +struct core_conn_create_data { + int length; + struct nci_core_conn_create_cmd *cmd; +}; + static void nci_cmd_work(struct work_struct *work); static void nci_rx_work(struct work_struct *work); static void nci_tx_work(struct work_struct *work); +struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, + int conn_id) +{ + struct nci_conn_info *conn_info; + + list_for_each_entry(conn_info, &ndev->conn_info_list, list) { + if (conn_info->conn_id == conn_id) + return conn_info; + } + + return NULL; +} + /* ---- NCI requests ---- */ void nci_req_complete(struct nci_dev *ndev, int result) @@ -109,10 +127,10 @@ static int __nci_request(struct nci_dev *ndev, return rc; } -static inline int nci_request(struct nci_dev *ndev, - void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout) +inline int nci_request(struct nci_dev *ndev, + void (*req)(struct nci_dev *ndev, + unsigned long opt), + unsigned long opt, __u32 timeout) { int rc; @@ -456,6 +474,95 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val) } EXPORT_SYMBOL(nci_set_config); +static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_nfcee_discover_cmd cmd; + __u8 action = opt; + + cmd.discovery_action = action; + + nci_send_cmd(ndev, NCI_OP_NFCEE_DISCOVER_CMD, 1, &cmd); +} + +int nci_nfcee_discover(struct nci_dev *ndev, u8 action) +{ + return nci_request(ndev, nci_nfcee_discover_req, action, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_nfcee_discover); + +static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_nfcee_mode_set_cmd *cmd = + (struct nci_nfcee_mode_set_cmd *)opt; + + nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, + sizeof(struct nci_nfcee_mode_set_cmd), cmd); +} + +int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) +{ + struct nci_nfcee_mode_set_cmd cmd; + + cmd.nfcee_id = nfcee_id; + cmd.nfcee_mode = nfcee_mode; + + return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_nfcee_mode_set); + +static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) +{ + struct core_conn_create_data *data = + (struct core_conn_create_data *)opt; + + nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); +} + +int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, + u8 number_destination_params, + size_t params_len, + struct core_conn_create_dest_spec_params *params) +{ + int r; + struct nci_core_conn_create_cmd *cmd; + struct core_conn_create_data data; + + data.length = params_len + sizeof(struct nci_core_conn_create_cmd); + cmd = kzalloc(data.length, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->destination_type = destination_type; + cmd->number_destination_params = number_destination_params; + memcpy(cmd->params, params, params_len); + + data.cmd = cmd; + ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX]; + + r = __nci_request(ndev, nci_core_conn_create_req, + (unsigned long)&data, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); + kfree(cmd); + return r; +} +EXPORT_SYMBOL(nci_core_conn_create); + +static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt) +{ + __u8 conn_id = opt; + + nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); +} + +int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) +{ + return nci_request(ndev, nci_core_conn_close_req, conn_id, + msecs_to_jiffies(NCI_CMD_TIMEOUT)); +} +EXPORT_SYMBOL(nci_core_conn_close); + static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); @@ -712,6 +819,11 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; + struct nci_conn_info *conn_info; + + conn_info = ndev->rf_conn_info; + if (!conn_info) + return -EPROTO; pr_debug("target_idx %d, len %d\n", target->idx, skb->len); @@ -724,8 +836,8 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, return -EBUSY; /* store cb and context to be used on receiving data */ - ndev->data_exchange_cb = cb; - ndev->data_exchange_cb_context = cb_context; + conn_info->data_exchange_cb = cb; + conn_info->data_exchange_cb_context = cb_context; rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb); if (rc) @@ -768,10 +880,16 @@ static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) static int nci_discover_se(struct nfc_dev *nfc_dev) { + int r; struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); - if (ndev->ops->discover_se) + if (ndev->ops->discover_se) { + r = nci_nfcee_discover(ndev, NCI_NFCEE_DISCOVERY_ACTION_ENABLE); + if (r != NCI_STATUS_OK) + return -EPROTO; + return ndev->ops->discover_se(ndev); + } return 0; } @@ -807,7 +925,6 @@ static struct nfc_ops nci_nfc_ops = { }; /* ---- Interface to NCI drivers ---- */ - /** * nci_allocate_device - allocate a new nci device * @@ -842,13 +959,20 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) - goto free_exit; + goto free_nci; + + ndev->hci_dev = nci_hci_allocate(ndev); + if (!ndev->hci_dev) + goto free_nfc; nfc_set_drvdata(ndev->nfc_dev, ndev); return ndev; -free_exit: +free_nfc: + kfree(ndev->nfc_dev); + +free_nci: kfree(ndev); return NULL; } @@ -913,6 +1037,7 @@ int nci_register_device(struct nci_dev *ndev) (unsigned long) ndev); mutex_init(&ndev->req_lock); + INIT_LIST_HEAD(&ndev->conn_info_list); rc = nfc_register_device(ndev->nfc_dev); if (rc) @@ -938,12 +1063,19 @@ EXPORT_SYMBOL(nci_register_device); */ void nci_unregister_device(struct nci_dev *ndev) { + struct nci_conn_info *conn_info, *n; + nci_close_device(ndev); destroy_workqueue(ndev->cmd_wq); destroy_workqueue(ndev->rx_wq); destroy_workqueue(ndev->tx_wq); + list_for_each_entry_safe(conn_info, n, &ndev->conn_info_list, list) { + list_del(&conn_info->list); + /* conn_info is allocated with devm_kzalloc */ + } + nfc_unregister_device(ndev->nfc_dev); } EXPORT_SYMBOL(nci_unregister_device); @@ -1027,20 +1159,25 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) static void nci_tx_work(struct work_struct *work) { struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work); + struct nci_conn_info *conn_info; struct sk_buff *skb; - pr_debug("credits_cnt %d\n", atomic_read(&ndev->credits_cnt)); + conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id); + if (!conn_info) + return; + + pr_debug("credits_cnt %d\n", atomic_read(&conn_info->credits_cnt)); /* Send queued tx data */ - while (atomic_read(&ndev->credits_cnt)) { + while (atomic_read(&conn_info->credits_cnt)) { skb = skb_dequeue(&ndev->tx_q); if (!skb) return; /* Check if data flow control is used */ - if (atomic_read(&ndev->credits_cnt) != + if (atomic_read(&conn_info->credits_cnt) != NCI_DATA_FLOW_CONTROL_NOT_USED) - atomic_dec(&ndev->credits_cnt); + atomic_dec(&conn_info->credits_cnt); pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n", nci_pbf(skb->data), @@ -1092,7 +1229,9 @@ static void nci_rx_work(struct work_struct *work) if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) { /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) - nci_data_exchange_complete(ndev, NULL, -ETIMEDOUT); + nci_data_exchange_complete(ndev, NULL, + ndev->cur_conn_id, + -ETIMEDOUT); clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); } diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index a2de2a8cb00e..566466d90048 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -36,10 +36,20 @@ /* Complete data exchange transaction and forward skb to nfc core */ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, - int err) + __u8 conn_id, int err) { - data_exchange_cb_t cb = ndev->data_exchange_cb; - void *cb_context = ndev->data_exchange_cb_context; + struct nci_conn_info *conn_info; + data_exchange_cb_t cb; + void *cb_context; + + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) { + kfree_skb(skb); + goto exit; + } + + cb = conn_info->data_exchange_cb; + cb_context = conn_info->data_exchange_cb_context; pr_debug("len %d, err %d\n", skb ? skb->len : 0, err); @@ -48,9 +58,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); if (cb) { - ndev->data_exchange_cb = NULL; - ndev->data_exchange_cb_context = NULL; - /* forward skb to nfc core */ cb(cb_context, skb, err); } else if (skb) { @@ -60,6 +67,7 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, kfree_skb(skb); } +exit: clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); } @@ -85,6 +93,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, static int nci_queue_tx_data_frags(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { + struct nci_conn_info *conn_info; int total_len = skb->len; unsigned char *data = skb->data; unsigned long flags; @@ -95,11 +104,17 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, pr_debug("conn_id 0x%x, total_len %d\n", conn_id, total_len); + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) { + rc = -EPROTO; + goto free_exit; + } + __skb_queue_head_init(&frags_q); while (total_len) { frag_len = - min_t(int, total_len, ndev->max_data_pkt_payload_size); + min_t(int, total_len, conn_info->max_pkt_payload_len); skb_frag = nci_skb_alloc(ndev, (NCI_DATA_HDR_SIZE + frag_len), @@ -151,12 +166,19 @@ exit: /* Send NCI data */ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) { + struct nci_conn_info *conn_info; int rc = 0; pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len); + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) { + rc = -EPROTO; + goto free_exit; + } + /* check if the packet need to be fragmented */ - if (skb->len <= ndev->max_data_pkt_payload_size) { + if (skb->len <= conn_info->max_pkt_payload_len) { /* no need to fragment packet */ nci_push_data_hdr(ndev, conn_id, skb, NCI_PBF_LAST); @@ -170,6 +192,7 @@ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) } } + ndev->cur_conn_id = conn_id; queue_work(ndev->tx_wq, &ndev->tx_work); goto exit; @@ -185,7 +208,7 @@ exit: static void nci_add_rx_data_frag(struct nci_dev *ndev, struct sk_buff *skb, - __u8 pbf, __u8 status) + __u8 pbf, __u8 conn_id, __u8 status) { int reassembly_len; int err = 0; @@ -229,16 +252,13 @@ static void nci_add_rx_data_frag(struct nci_dev *ndev, } exit: - if (ndev->nfc_dev->rf_mode == NFC_RF_INITIATOR) { - nci_data_exchange_complete(ndev, skb, err); - } else if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) { + if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) { /* Data received in Target mode, forward to nfc core */ err = nfc_tm_data_received(ndev->nfc_dev, skb); if (err) pr_err("unable to handle received data\n"); } else { - pr_err("rf mode unknown\n"); - kfree_skb(skb); + nci_data_exchange_complete(ndev, skb, conn_id, err); } } @@ -247,6 +267,8 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u8 pbf = nci_pbf(skb->data); __u8 status = 0; + __u8 conn_id = nci_conn_id(skb->data); + struct nci_conn_info *conn_info; pr_debug("len %d\n", skb->len); @@ -255,6 +277,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_conn_id(skb->data), nci_plen(skb->data)); + conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); + if (!conn_info) + return; + /* strip the nci data header */ skb_pull(skb, NCI_DATA_HDR_SIZE); @@ -268,5 +294,5 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) skb_trim(skb, (skb->len - 1)); } - nci_add_rx_data_frag(ndev, skb, pbf, nci_to_errno(status)); + nci_add_rx_data_frag(ndev, skb, pbf, conn_id, nci_to_errno(status)); } diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c new file mode 100644 index 000000000000..ed54ec533836 --- /dev/null +++ b/net/nfc/nci/hci.c @@ -0,0 +1,694 @@ +/* + * The NFC Controller Interface is the communication protocol between an + * NFC Controller (NFCC) and a Device Host (DH). + * This is the HCI over NCI implementation, as specified in the 10.2 + * section of the NCI 1.1 specification. + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include <linux/skbuff.h> + +#include "../nfc.h" +#include <net/nfc/nci.h> +#include <net/nfc/nci_core.h> +#include <linux/nfc.h> + +struct nci_data { + u8 conn_id; + u8 pipe; + u8 cmd; + const u8 *data; + u32 data_len; +} __packed; + +struct nci_hci_create_pipe_params { + u8 src_gate; + u8 dest_host; + u8 dest_gate; +} __packed; + +struct nci_hci_create_pipe_resp { + u8 src_host; + u8 src_gate; + u8 dest_host; + u8 dest_gate; + u8 pipe; +} __packed; + +struct nci_hci_delete_pipe_noti { + u8 pipe; +} __packed; + +struct nci_hci_all_pipe_cleared_noti { + u8 host; +} __packed; + +struct nci_hcp_message { + u8 header; /* type -cmd,evt,rsp- + instruction */ + u8 data[]; +} __packed; + +struct nci_hcp_packet { + u8 header; /* cbit+pipe */ + struct nci_hcp_message message; +} __packed; + +#define NCI_HCI_ANY_SET_PARAMETER 0x01 +#define NCI_HCI_ANY_GET_PARAMETER 0x02 +#define NCI_HCI_ANY_CLOSE_PIPE 0x04 + +#define NCI_HFP_NO_CHAINING 0x80 + +#define NCI_NFCEE_ID_HCI 0x80 + +#define NCI_EVT_HOT_PLUG 0x03 + +#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 + +/* HCP headers */ +#define NCI_HCI_HCP_PACKET_HEADER_LEN 1 +#define NCI_HCI_HCP_MESSAGE_HEADER_LEN 1 +#define NCI_HCI_HCP_HEADER_LEN 2 + +/* HCP types */ +#define NCI_HCI_HCP_COMMAND 0x00 +#define NCI_HCI_HCP_EVENT 0x01 +#define NCI_HCI_HCP_RESPONSE 0x02 + +#define NCI_HCI_ADM_NOTIFY_PIPE_CREATED 0x12 +#define NCI_HCI_ADM_NOTIFY_PIPE_DELETED 0x13 +#define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15 + +#define NCI_HCI_FRAGMENT 0x7f +#define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\ + ((instr) & 0x3f)) + +#define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6) +#define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) +#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) + +/* HCI core */ +static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) +{ + int i; + + for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { + hdev->pipes[i].gate = NCI_HCI_INVALID_GATE; + hdev->pipes[i].host = NCI_HCI_INVALID_HOST; + } + memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); +} + +static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host) +{ + int i; + + for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { + if (ndev->hci_dev->pipes[i].host == host) { + ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE; + ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST; + } + } +} + +/* Fragment HCI data over NCI packet. + * NFC Forum NCI 10.2.2 Data Exchange: + * The payload of the Data Packets sent on the Logical Connection SHALL be + * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL + * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be + * applied to Data Messages in either direction. The HCI fragmentation mechanism + * is used if required. + */ +static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, + const u8 data_type, const u8 *data, + size_t data_len) +{ + struct nci_conn_info *conn_info; + struct sk_buff *skb; + int len, i, r; + u8 cb = pipe; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len + + NCI_DATA_HDR_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE); + *skb_push(skb, 1) = data_type; + + i = 0; + len = conn_info->max_pkt_payload_len; + + do { + /* If last packet add NCI_HFP_NO_CHAINING */ + if (i + conn_info->max_pkt_payload_len - + (skb->len + 1) >= data_len) { + cb |= NCI_HFP_NO_CHAINING; + len = data_len - i; + } else { + len = conn_info->max_pkt_payload_len - skb->len - 1; + } + + *skb_push(skb, 1) = cb; + + if (len > 0) + memcpy(skb_put(skb, len), data + i, len); + + r = nci_send_data(ndev, conn_info->conn_id, skb); + if (r < 0) + return r; + + i += len; + if (i < data_len) { + skb_trim(skb, 0); + skb_pull(skb, len); + } + } while (i < data_len); + + return i; +} + +static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_data *data = (struct nci_data *)opt; + + nci_hci_send_data(ndev, data->pipe, data->cmd, + data->data, data->data_len); +} + +int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, + const u8 *param, size_t param_len) +{ + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + return nci_hci_send_data(ndev, pipe, + NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event), + param, param_len); +} +EXPORT_SYMBOL(nci_hci_send_event); + +int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, + const u8 *param, size_t param_len, + struct sk_buff **skb) +{ + struct nci_conn_info *conn_info; + struct nci_data data; + int r; + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd); + data.data = param; + data.data_len = param_len; + + r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + + if (r == NCI_STATUS_OK) + *skb = conn_info->rx_skb; + + return r; +} +EXPORT_SYMBOL(nci_hci_send_cmd); + +static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe, + u8 event, struct sk_buff *skb) +{ + if (ndev->ops->hci_event_received) + ndev->ops->hci_event_received(ndev, pipe, event, skb); +} + +static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, + u8 cmd, struct sk_buff *skb) +{ + u8 gate = ndev->hci_dev->pipes[pipe].gate; + u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT; + u8 dest_gate, new_pipe; + struct nci_hci_create_pipe_resp *create_info; + struct nci_hci_delete_pipe_noti *delete_info; + struct nci_hci_all_pipe_cleared_noti *cleared_info; + + pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + + switch (cmd) { + case NCI_HCI_ADM_NOTIFY_PIPE_CREATED: + if (skb->len != 5) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + create_info = (struct nci_hci_create_pipe_resp *)skb->data; + dest_gate = create_info->dest_gate; + new_pipe = create_info->pipe; + + /* Save the new created pipe and bind with local gate, + * the description for skb->data[3] is destination gate id + * but since we received this cmd from host controller, we + * are the destination and it is our local gate + */ + ndev->hci_dev->gate2pipe[dest_gate] = new_pipe; + ndev->hci_dev->pipes[new_pipe].gate = dest_gate; + ndev->hci_dev->pipes[new_pipe].host = + create_info->src_host; + break; + case NCI_HCI_ANY_OPEN_PIPE: + /* If the pipe is not created report an error */ + if (gate == NCI_HCI_INVALID_GATE) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + break; + case NCI_HCI_ADM_NOTIFY_PIPE_DELETED: + if (skb->len != 1) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + + ndev->hci_dev->pipes[delete_info->pipe].gate = + NCI_HCI_INVALID_GATE; + ndev->hci_dev->pipes[delete_info->pipe].host = + NCI_HCI_INVALID_HOST; + break; + case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: + if (skb->len != 1) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } + + cleared_info = + (struct nci_hci_all_pipe_cleared_noti *)skb->data; + nci_hci_reset_pipes_per_host(ndev, cleared_info->host); + break; + default: + pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate); + break; + } + + if (ndev->ops->hci_cmd_received) + ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb); + +exit: + nci_hci_send_data(ndev, pipe, status, NULL, 0); + + kfree_skb(skb); +} + +static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, + u8 result, struct sk_buff *skb) +{ + struct nci_conn_info *conn_info; + u8 status = result; + + if (result != NCI_HCI_ANY_OK) + goto exit; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) { + status = NCI_STATUS_REJECTED; + goto exit; + } + + conn_info->rx_skb = skb; + +exit: + nci_req_complete(ndev, status); +} + +/* Receive hcp message for pipe, with type and cmd. + * skb contains optional message data only. + */ +static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe, + u8 type, u8 instruction, struct sk_buff *skb) +{ + switch (type) { + case NCI_HCI_HCP_RESPONSE: + nci_hci_resp_received(ndev, pipe, instruction, skb); + break; + case NCI_HCI_HCP_COMMAND: + nci_hci_cmd_received(ndev, pipe, instruction, skb); + break; + case NCI_HCI_HCP_EVENT: + nci_hci_event_received(ndev, pipe, instruction, skb); + break; + default: + pr_err("UNKNOWN MSG Type %d, instruction=%d\n", + type, instruction); + kfree_skb(skb); + break; + } + + nci_req_complete(ndev, 0); +} + +static void nci_hci_msg_rx_work(struct work_struct *work) +{ + struct nci_hci_dev *hdev = + container_of(work, struct nci_hci_dev, msg_rx_work); + struct sk_buff *skb; + struct nci_hcp_message *message; + u8 pipe, type, instruction; + + while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { + pipe = skb->data[0]; + skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); + message = (struct nci_hcp_message *)skb->data; + type = NCI_HCP_MSG_GET_TYPE(message->header); + instruction = NCI_HCP_MSG_GET_CMD(message->header); + skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + nci_hci_hcp_message_rx(hdev->ndev, pipe, + type, instruction, skb); + } +} + +void nci_hci_data_received_cb(void *context, + struct sk_buff *skb, int err) +{ + struct nci_dev *ndev = (struct nci_dev *)context; + struct nci_hcp_packet *packet; + u8 pipe, type, instruction; + struct sk_buff *hcp_skb; + struct sk_buff *frag_skb; + int msg_len; + + pr_debug("\n"); + + if (err) { + nci_req_complete(ndev, err); + return; + } + + packet = (struct nci_hcp_packet *)skb->data; + if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { + skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); + return; + } + + /* it's the last fragment. Does it need re-aggregation? */ + if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { + pipe = packet->header & NCI_HCI_FRAGMENT; + skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); + + msg_len = 0; + skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { + msg_len += (frag_skb->len - + NCI_HCI_HCP_PACKET_HEADER_LEN); + } + + hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN + + msg_len, GFP_KERNEL); + if (!hcp_skb) { + nci_req_complete(ndev, -ENOMEM); + return; + } + + *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; + + skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { + msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; + memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); + } + + skb_queue_purge(&ndev->hci_dev->rx_hcp_frags); + } else { + packet->header &= NCI_HCI_FRAGMENT; + hcp_skb = skb; + } + + /* if this is a response, dispatch immediately to + * unblock waiting cmd context. Otherwise, enqueue to dispatch + * in separate context where handler can also execute command. + */ + packet = (struct nci_hcp_packet *)hcp_skb->data; + type = NCI_HCP_MSG_GET_TYPE(packet->message.header); + if (type == NCI_HCI_HCP_RESPONSE) { + pipe = packet->header; + instruction = NCI_HCP_MSG_GET_CMD(packet->message.header); + skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN + + NCI_HCI_HCP_MESSAGE_HEADER_LEN); + nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb); + } else { + skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); + schedule_work(&ndev->hci_dev->msg_rx_work); + } +} + +int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) +{ + struct nci_data data; + struct nci_conn_info *conn_info; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, + NCI_HCI_ANY_OPEN_PIPE); + data.data = NULL; + data.data_len = 0; + + return nci_request(ndev, nci_hci_send_data_req, + (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); +} +EXPORT_SYMBOL(nci_hci_open_pipe); + +int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, + const u8 *param, size_t param_len) +{ + struct nci_conn_info *conn_info; + struct nci_data data; + int r; + u8 *tmp; + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + pr_debug("idx=%d to gate %d\n", idx, gate); + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + tmp = kmalloc(1 + param_len, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + *tmp = idx; + memcpy(tmp + 1, param, param_len); + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, + NCI_HCI_ANY_SET_PARAMETER); + data.data = tmp; + data.data_len = param_len + 1; + + r = nci_request(ndev, nci_hci_send_data_req, + (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + + kfree(tmp); + return r; +} +EXPORT_SYMBOL(nci_hci_set_param); + +int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, + struct sk_buff **skb) +{ + struct nci_conn_info *conn_info; + struct nci_data data; + int r; + u8 pipe = ndev->hci_dev->gate2pipe[gate]; + + pr_debug("idx=%d to gate %d\n", idx, gate); + + if (pipe == NCI_HCI_INVALID_PIPE) + return -EADDRNOTAVAIL; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + data.conn_id = conn_info->conn_id; + data.pipe = pipe; + data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, + NCI_HCI_ANY_GET_PARAMETER); + data.data = &idx; + data.data_len = 1; + + r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); + + if (r == NCI_STATUS_OK) + *skb = conn_info->rx_skb; + + return r; +} +EXPORT_SYMBOL(nci_hci_get_param); + +int nci_hci_connect_gate(struct nci_dev *ndev, + u8 dest_host, u8 dest_gate, u8 pipe) +{ + int r; + + if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE) + return 0; + + if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE) + return -EADDRINUSE; + + if (pipe != NCI_HCI_INVALID_PIPE) + goto open_pipe; + + switch (dest_gate) { + case NCI_HCI_LINK_MGMT_GATE: + pipe = NCI_HCI_LINK_MGMT_PIPE; + break; + case NCI_HCI_ADMIN_GATE: + pipe = NCI_HCI_ADMIN_PIPE; + break; + } + +open_pipe: + r = nci_hci_open_pipe(ndev, pipe); + if (r < 0) + return r; + + ndev->hci_dev->pipes[pipe].gate = dest_gate; + ndev->hci_dev->pipes[pipe].host = dest_host; + ndev->hci_dev->gate2pipe[dest_gate] = pipe; + + return 0; +} +EXPORT_SYMBOL(nci_hci_connect_gate); + +static int nci_hci_dev_connect_gates(struct nci_dev *ndev, + u8 gate_count, + struct nci_hci_gate *gates) +{ + int r; + + while (gate_count--) { + r = nci_hci_connect_gate(ndev, gates->dest_host, + gates->gate, gates->pipe); + if (r < 0) + return r; + gates++; + } + + return 0; +} + +int nci_hci_dev_session_init(struct nci_dev *ndev) +{ + struct nci_conn_info *conn_info; + struct sk_buff *skb; + int r; + + ndev->hci_dev->count_pipes = 0; + ndev->hci_dev->expected_pipes = 0; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + return -EPROTO; + + conn_info->data_exchange_cb = nci_hci_data_received_cb; + conn_info->data_exchange_cb_context = ndev; + + nci_hci_reset_pipes(ndev->hci_dev); + + if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE) + return -EPROTO; + + r = nci_hci_connect_gate(ndev, + ndev->hci_dev->init_data.gates[0].dest_host, + ndev->hci_dev->init_data.gates[0].gate, + ndev->hci_dev->init_data.gates[0].pipe); + if (r < 0) + goto exit; + + r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb); + if (r < 0) + goto exit; + + if (skb->len && + skb->len == strlen(ndev->hci_dev->init_data.session_id) && + memcmp(ndev->hci_dev->init_data.session_id, + skb->data, skb->len) == 0 && + ndev->ops->hci_load_session) { + /* Restore gate<->pipe table from some proprietary location. */ + r = ndev->ops->hci_load_session(ndev); + if (r < 0) + goto exit; + } else { + r = nci_hci_dev_connect_gates(ndev, + ndev->hci_dev->init_data.gate_count, + ndev->hci_dev->init_data.gates); + if (r < 0) + goto exit; + + r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, + ndev->hci_dev->init_data.session_id, + strlen(ndev->hci_dev->init_data.session_id)); + } + if (r == 0) + goto exit; + +exit: + kfree_skb(skb); + + return r; +} +EXPORT_SYMBOL(nci_hci_dev_session_init); + +struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) +{ + struct nci_hci_dev *hdev; + + hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); + if (!hdev) + return NULL; + + skb_queue_head_init(&hdev->rx_hcp_frags); + INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work); + skb_queue_head_init(&hdev->msg_rx_queue); + hdev->ndev = ndev; + + return hdev; +} diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 22e453cb787d..3218071072ac 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -43,6 +43,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { struct nci_core_conn_credit_ntf *ntf = (void *) skb->data; + struct nci_conn_info *conn_info; int i; pr_debug("num_entries %d\n", ntf->num_entries); @@ -59,11 +60,13 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, i, ntf->conn_entries[i].conn_id, ntf->conn_entries[i].credits); - if (ntf->conn_entries[i].conn_id == NCI_STATIC_RF_CONN_ID) { - /* found static rf connection */ - atomic_add(ntf->conn_entries[i].credits, - &ndev->credits_cnt); - } + conn_info = nci_get_conn_info_by_conn_id(ndev, + ntf->conn_entries[i].conn_id); + if (!conn_info) + return; + + atomic_add(ntf->conn_entries[i].credits, + &conn_info->credits_cnt); } /* trigger the next tx */ @@ -96,7 +99,7 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev, /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) - nci_data_exchange_complete(ndev, NULL, -EIO); + nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO); } static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, @@ -513,6 +516,7 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev, static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { + struct nci_conn_info *conn_info; struct nci_rf_intf_activated_ntf ntf; __u8 *data = skb->data; int err = NCI_STATUS_OK; @@ -537,6 +541,13 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, pr_debug("rf_tech_specific_params_len %d\n", ntf.rf_tech_specific_params_len); + /* If this contains a value of 0x00 (NFCEE Direct RF + * Interface) then all following parameters SHALL contain a + * value of 0 and SHALL be ignored. + */ + if (ntf.rf_interface == NCI_RF_INTERFACE_NFCEE_DIRECT) + goto listen; + if (ntf.rf_tech_specific_params_len > 0) { switch (ntf.activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: @@ -614,11 +625,16 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, exit: if (err == NCI_STATUS_OK) { - ndev->max_data_pkt_payload_size = ntf.max_data_pkt_payload_size; - ndev->initial_num_credits = ntf.initial_num_credits; + conn_info = ndev->rf_conn_info; + if (!conn_info) + return; + + conn_info->max_pkt_payload_len = ntf.max_data_pkt_payload_size; + conn_info->initial_num_credits = ntf.initial_num_credits; /* set the available credits to initial value */ - atomic_set(&ndev->credits_cnt, ndev->initial_num_credits); + atomic_set(&conn_info->credits_cnt, + conn_info->initial_num_credits); /* store general bytes to be reported later in dep_link_up */ if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) { @@ -643,6 +659,7 @@ exit: nci_req_complete(ndev, err); } } else { +listen: /* Listen mode */ atomic_set(&ndev->state, NCI_LISTEN_ACTIVE); if (err == NCI_STATUS_OK && @@ -661,10 +678,15 @@ exit: static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { + struct nci_conn_info *conn_info; struct nci_rf_deactivate_ntf *ntf = (void *) skb->data; pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason); + conn_info = ndev->rf_conn_info; + if (!conn_info) + return; + /* drop tx data queue */ skb_queue_purge(&ndev->tx_q); @@ -676,7 +698,8 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, /* complete the data exchange transaction, if exists */ if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) - nci_data_exchange_complete(ndev, NULL, -EIO); + nci_data_exchange_complete(ndev, NULL, NCI_STATIC_RF_CONN_ID, + -EIO); switch (ntf->type) { case NCI_DEACTIVATE_TYPE_IDLE_MODE: @@ -696,6 +719,32 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, nci_req_complete(ndev, NCI_STATUS_OK); } +static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + u8 status = NCI_STATUS_OK; + struct nci_nfcee_discover_ntf *nfcee_ntf = + (struct nci_nfcee_discover_ntf *)skb->data; + + pr_debug("\n"); + + /* NFCForum NCI 9.2.1 HCI Network Specific Handling + * If the NFCC supports the HCI Network, it SHALL return one, + * and only one, NFCEE_DISCOVER_NTF with a Protocol type of + * “HCI Access”, even if the HCI Network contains multiple NFCEEs. + */ + ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id; + ndev->cur_id = nfcee_ntf->nfcee_id; + + nci_req_complete(ndev, status); +} + +static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + pr_debug("\n"); +} + void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u16 ntf_opcode = nci_opcode(skb->data); @@ -734,6 +783,14 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_rf_deactivate_ntf_packet(ndev, skb); break; + case NCI_OP_NFCEE_DISCOVER_NTF: + nci_nfcee_discover_ntf_packet(ndev, skb); + break; + + case NCI_OP_RF_NFCEE_ACTION_NTF: + nci_nfcee_action_ntf_packet(ndev, skb); + break; + default: pr_err("unknown ntf opcode 0x%x\n", ntf_opcode); break; diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index 041de51ccdbe..02486bc2ceea 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -140,13 +140,31 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) { + struct nci_conn_info *conn_info; __u8 status = skb->data[0]; pr_debug("status 0x%x\n", status); - if (status == NCI_STATUS_OK) + if (status == NCI_STATUS_OK) { atomic_set(&ndev->state, NCI_DISCOVERY); + conn_info = ndev->rf_conn_info; + if (!conn_info) { + conn_info = devm_kzalloc(&ndev->nfc_dev->dev, + sizeof(struct nci_conn_info), + GFP_KERNEL); + if (!conn_info) { + status = NCI_STATUS_REJECTED; + goto exit; + } + conn_info->conn_id = NCI_STATIC_RF_CONN_ID; + INIT_LIST_HEAD(&conn_info->list); + list_add(&conn_info->list, &ndev->conn_info_list); + ndev->rf_conn_info = conn_info; + } + } + +exit: nci_req_complete(ndev, status); } @@ -178,6 +196,90 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev, } } +static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_nfcee_discover_rsp *discover_rsp; + + if (skb->len != 2) { + nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR); + return; + } + + discover_rsp = (struct nci_nfcee_discover_rsp *)skb->data; + + if (discover_rsp->status != NCI_STATUS_OK || + discover_rsp->num_nfcee == 0) + nci_req_complete(ndev, discover_rsp->status); +} + +static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + + pr_debug("status 0x%x\n", status); + nci_req_complete(ndev, status); +} + +static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + struct nci_conn_info *conn_info; + struct nci_core_conn_create_rsp *rsp; + + pr_debug("status 0x%x\n", status); + + if (status == NCI_STATUS_OK) { + rsp = (struct nci_core_conn_create_rsp *)skb->data; + + conn_info = devm_kzalloc(&ndev->nfc_dev->dev, + sizeof(*conn_info), GFP_KERNEL); + if (!conn_info) { + status = NCI_STATUS_REJECTED; + goto exit; + } + + conn_info->id = ndev->cur_id; + conn_info->conn_id = rsp->conn_id; + + /* Note: data_exchange_cb and data_exchange_cb_context need to + * be specify out of nci_core_conn_create_rsp_packet + */ + + INIT_LIST_HEAD(&conn_info->list); + list_add(&conn_info->list, &ndev->conn_info_list); + + if (ndev->cur_id == ndev->hci_dev->nfcee_id) + ndev->hci_dev->conn_info = conn_info; + + conn_info->conn_id = rsp->conn_id; + conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len; + atomic_set(&conn_info->credits_cnt, rsp->credits_cnt); + } + +exit: + nci_req_complete(ndev, status); +} + +static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_conn_info *conn_info; + __u8 status = skb->data[0]; + + pr_debug("status 0x%x\n", status); + if (status == NCI_STATUS_OK) { + conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id); + if (conn_info) { + list_del(&conn_info->list); + devm_kfree(&ndev->nfc_dev->dev, conn_info); + } + } + nci_req_complete(ndev, status); +} + void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) { __u16 rsp_opcode = nci_opcode(skb->data); @@ -207,6 +309,14 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_core_set_config_rsp_packet(ndev, skb); break; + case NCI_OP_CORE_CONN_CREATE_RSP: + nci_core_conn_create_rsp_packet(ndev, skb); + break; + + case NCI_OP_CORE_CONN_CLOSE_RSP: + nci_core_conn_close_rsp_packet(ndev, skb); + break; + case NCI_OP_RF_DISCOVER_MAP_RSP: nci_rf_disc_map_rsp_packet(ndev, skb); break; @@ -223,6 +333,14 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_rf_deactivate_rsp_packet(ndev, skb); break; + case NCI_OP_NFCEE_DISCOVER_RSP: + nci_nfcee_discover_rsp_packet(ndev, skb); + break; + + case NCI_OP_NFCEE_MODE_SET_RSP: + nci_nfcee_mode_set_rsp_packet(ndev, skb); + break; + default: pr_err("unknown rsp opcode 0x%x\n", rsp_opcode); break; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 44989fc8cddf..14a2d11581da 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -102,7 +102,8 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, goto nla_put_failure; } - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -496,6 +497,53 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, + struct nfc_evt_transaction *evt_transaction) +{ + struct nfc_se *se; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_TRANSACTION); + if (!hdr) + goto free_msg; + + se = nfc_find_se(dev, se_idx); + if (!se) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) || + nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len, + evt_transaction->aid) || + nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len, + evt_transaction->params)) + goto nla_put_failure; + + /* evt_transaction is no more used */ + devm_kfree(&dev->dev, evt_transaction); + + genlmsg_end(msg, hdr); + + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + /* evt_transaction is no more used */ + devm_kfree(&dev->dev, evt_transaction); + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, @@ -518,7 +566,8 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -908,7 +957,8 @@ static int nfc_genl_send_params(struct sk_buff *msg, nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux))) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: @@ -1247,8 +1297,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev, nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) goto nla_put_failure; - if (genlmsg_end(msg, hdr) < 0) - goto nla_put_failure; + genlmsg_end(msg, hdr); } return 0; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 88d60064890e..a8ce80b47720 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -100,6 +100,8 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); +int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, + struct nfc_evt_transaction *evt_transaction); struct nfc_dev *nfc_get_device(unsigned int idx); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 764fdc39c63b..b491c1c296fe 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -147,7 +147,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; - skb_set_inner_protocol(skb, skb->protocol); + if (!skb->inner_protocol) + skb_set_inner_protocol(skb, skb->protocol); skb->protocol = mpls->mpls_ethertype; invalidate_flow_key(key); @@ -184,10 +185,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, - const __be32 *mpls_lse) +/* 'KEY' must not have any bits set outside of the 'MASK' */ +#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) +#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK)) + +static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, + const __be32 *mpls_lse, const __be32 *mask) { __be32 *stack; + __be32 lse; int err; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); @@ -195,14 +201,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, return err; stack = (__be32 *)skb_mpls_header(skb); + lse = MASKED(*stack, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), *mpls_lse }; + __be32 diff[] = { ~(*stack), lse }; + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = *mpls_lse; - key->mpls.top_lse = *mpls_lse; + *stack = lse; + flow_key->mpls.top_lse = lse; return 0; } @@ -211,7 +219,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) int err; err = skb_vlan_pop(skb); - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = 0; @@ -221,7 +229,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) invalidate_flow_key(key); else key->eth.tci = vlan->vlan_tci; @@ -229,23 +237,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); } -static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ethernet *eth_key) +/* 'src' is already properly masked. */ +static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_) +{ + u16 *dst = (u16 *)dst_; + const u16 *src = (const u16 *)src_; + const u16 *mask = (const u16 *)mask_; + + SET_MASKED(dst[0], src[0], mask[0]); + SET_MASKED(dst[1], src[1], mask[1]); + SET_MASKED(dst[2], src[2], mask[2]); +} + +static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ethernet *key, + const struct ovs_key_ethernet *mask) { int err; + err = skb_ensure_writable(skb, ETH_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); - ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); + ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src, + mask->eth_src); + ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, + mask->eth_dst); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - ether_addr_copy(key->eth.src, eth_key->eth_src); - ether_addr_copy(key->eth.dst, eth_key->eth_dst); + ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); + ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); return 0; } @@ -303,6 +327,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, } } +static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4], + const __be32 mask[4], __be32 masked[4]) +{ + masked[0] = MASKED(old[0], addr[0], mask[0]); + masked[1] = MASKED(old[1], addr[1], mask[1]); + masked[2] = MASKED(old[2], addr[2], mask[2]); + masked[3] = MASKED(old[3], addr[3], mask[3]); +} + static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, __be32 addr[4], const __be32 new_addr[4], bool recalculate_csum) @@ -314,29 +347,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) { - nh->priority = tc >> 4; - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); + /* Bits 21-24 are always unmasked, so this retains their values. */ + SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); + SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); + SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) +static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, + u8 mask) { - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; - nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; - nh->flow_lbl[2] = fl & 0x000000FF; -} + new_ttl = MASKED(nh->ttl, new_ttl, mask); -static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) -{ csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); nh->ttl = new_ttl; } -static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ipv4 *ipv4_key) +static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ipv4 *key, + const struct ovs_key_ipv4 *mask) { struct iphdr *nh; + __be32 new_addr; int err; err = skb_ensure_writable(skb, skb_network_offset(skb) + @@ -346,36 +379,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, nh = ip_hdr(skb); - if (ipv4_key->ipv4_src != nh->saddr) { - set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); - key->ipv4.addr.src = ipv4_key->ipv4_src; - } + /* Setting an IP addresses is typically only a side effect of + * matching on them in the current userspace implementation, so it + * makes sense to check if the value actually changed. + */ + if (mask->ipv4_src) { + new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); - if (ipv4_key->ipv4_dst != nh->daddr) { - set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); - key->ipv4.addr.dst = ipv4_key->ipv4_dst; + if (unlikely(new_addr != nh->saddr)) { + set_ip_addr(skb, nh, &nh->saddr, new_addr); + flow_key->ipv4.addr.src = new_addr; + } } + if (mask->ipv4_dst) { + new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); - if (ipv4_key->ipv4_tos != nh->tos) { - ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); - key->ip.tos = nh->tos; + if (unlikely(new_addr != nh->daddr)) { + set_ip_addr(skb, nh, &nh->daddr, new_addr); + flow_key->ipv4.addr.dst = new_addr; + } } - - if (ipv4_key->ipv4_ttl != nh->ttl) { - set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); - key->ip.ttl = ipv4_key->ipv4_ttl; + if (mask->ipv4_tos) { + ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos); + flow_key->ip.tos = nh->tos; + } + if (mask->ipv4_ttl) { + set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl); + flow_key->ip.ttl = nh->ttl; } return 0; } -static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ipv6 *ipv6_key) +static bool is_ipv6_mask_nonzero(const __be32 addr[4]) +{ + return !!(addr[0] | addr[1] | addr[2] | addr[3]); +} + +static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_ipv6 *key, + const struct ovs_key_ipv6 *mask) { struct ipv6hdr *nh; int err; - __be32 *saddr; - __be32 *daddr; err = skb_ensure_writable(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr)); @@ -383,71 +429,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, return err; nh = ipv6_hdr(skb); - saddr = (__be32 *)&nh->saddr; - daddr = (__be32 *)&nh->daddr; - - if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) { - set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, - ipv6_key->ipv6_src, true); - memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src, - sizeof(ipv6_key->ipv6_src)); - } - if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { + /* Setting an IP addresses is typically only a side effect of + * matching on them in the current userspace implementation, so it + * makes sense to check if the value actually changed. + */ + if (is_ipv6_mask_nonzero(mask->ipv6_src)) { + __be32 *saddr = (__be32 *)&nh->saddr; + __be32 masked[4]; + + mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); + + if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { + set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, + true); + memcpy(&flow_key->ipv6.addr.src, masked, + sizeof(flow_key->ipv6.addr.src)); + } + } + if (is_ipv6_mask_nonzero(mask->ipv6_dst)) { unsigned int offset = 0; int flags = IP6_FH_F_SKIP_RH; bool recalc_csum = true; - - if (ipv6_ext_hdr(nh->nexthdr)) - recalc_csum = ipv6_find_hdr(skb, &offset, - NEXTHDR_ROUTING, NULL, - &flags) != NEXTHDR_ROUTING; - - set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, - ipv6_key->ipv6_dst, recalc_csum); - memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst, - sizeof(ipv6_key->ipv6_dst)); + __be32 *daddr = (__be32 *)&nh->daddr; + __be32 masked[4]; + + mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked); + + if (unlikely(memcmp(daddr, masked, sizeof(masked)))) { + if (ipv6_ext_hdr(nh->nexthdr)) + recalc_csum = (ipv6_find_hdr(skb, &offset, + NEXTHDR_ROUTING, + NULL, &flags) + != NEXTHDR_ROUTING); + + set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, + recalc_csum); + memcpy(&flow_key->ipv6.addr.dst, masked, + sizeof(flow_key->ipv6.addr.dst)); + } + } + if (mask->ipv6_tclass) { + ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + flow_key->ip.tos = ipv6_get_dsfield(nh); + } + if (mask->ipv6_label) { + set_ipv6_fl(nh, ntohl(key->ipv6_label), + ntohl(mask->ipv6_label)); + flow_key->ipv6.label = + *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); + } + if (mask->ipv6_hlimit) { + SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit); + flow_key->ip.ttl = nh->hop_limit; } - - set_ipv6_tc(nh, ipv6_key->ipv6_tclass); - key->ip.tos = ipv6_get_dsfield(nh); - - set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); - key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); - - nh->hop_limit = ipv6_key->ipv6_hlimit; - key->ip.ttl = ipv6_key->ipv6_hlimit; return 0; } /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, - __be16 new_port, __sum16 *check) + __be16 new_port, __sum16 *check) { inet_proto_csum_replace2(check, skb, *port, new_port, 0); *port = new_port; - skb_clear_hash(skb); -} - -static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) -{ - struct udphdr *uh = udp_hdr(skb); - - if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { - set_tp_port(skb, port, new_port, &uh->check); - - if (!uh->check) - uh->check = CSUM_MANGLED_0; - } else { - *port = new_port; - skb_clear_hash(skb); - } } -static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_udp *udp_port_key) +static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_udp *key, + const struct ovs_key_udp *mask) { struct udphdr *uh; + __be16 src, dst; int err; err = skb_ensure_writable(skb, skb_transport_offset(skb) + @@ -456,23 +508,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, return err; uh = udp_hdr(skb); - if (udp_port_key->udp_src != uh->source) { - set_udp_port(skb, &uh->source, udp_port_key->udp_src); - key->tp.src = udp_port_key->udp_src; - } + /* Either of the masks is non-zero, so do not bother checking them. */ + src = MASKED(uh->source, key->udp_src, mask->udp_src); + dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst); - if (udp_port_key->udp_dst != uh->dest) { - set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); - key->tp.dst = udp_port_key->udp_dst; + if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { + if (likely(src != uh->source)) { + set_tp_port(skb, &uh->source, src, &uh->check); + flow_key->tp.src = src; + } + if (likely(dst != uh->dest)) { + set_tp_port(skb, &uh->dest, dst, &uh->check); + flow_key->tp.dst = dst; + } + + if (unlikely(!uh->check)) + uh->check = CSUM_MANGLED_0; + } else { + uh->source = src; + uh->dest = dst; + flow_key->tp.src = src; + flow_key->tp.dst = dst; } + skb_clear_hash(skb); + return 0; } -static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_tcp *tcp_port_key) +static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_tcp *key, + const struct ovs_key_tcp *mask) { struct tcphdr *th; + __be16 src, dst; int err; err = skb_ensure_writable(skb, skb_transport_offset(skb) + @@ -481,50 +550,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, return err; th = tcp_hdr(skb); - if (tcp_port_key->tcp_src != th->source) { - set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); - key->tp.src = tcp_port_key->tcp_src; + src = MASKED(th->source, key->tcp_src, mask->tcp_src); + if (likely(src != th->source)) { + set_tp_port(skb, &th->source, src, &th->check); + flow_key->tp.src = src; } - - if (tcp_port_key->tcp_dst != th->dest) { - set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); - key->tp.dst = tcp_port_key->tcp_dst; + dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst); + if (likely(dst != th->dest)) { + set_tp_port(skb, &th->dest, dst, &th->check); + flow_key->tp.dst = dst; } + skb_clear_hash(skb); return 0; } -static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_sctp *sctp_port_key) +static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct ovs_key_sctp *key, + const struct ovs_key_sctp *mask) { + unsigned int sctphoff = skb_transport_offset(skb); struct sctphdr *sh; + __le32 old_correct_csum, new_csum, old_csum; int err; - unsigned int sctphoff = skb_transport_offset(skb); err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr)); if (unlikely(err)) return err; sh = sctp_hdr(skb); - if (sctp_port_key->sctp_src != sh->source || - sctp_port_key->sctp_dst != sh->dest) { - __le32 old_correct_csum, new_csum, old_csum; + old_csum = sh->checksum; + old_correct_csum = sctp_compute_cksum(skb, sctphoff); - old_csum = sh->checksum; - old_correct_csum = sctp_compute_cksum(skb, sctphoff); + sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src); + sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); - sh->source = sctp_port_key->sctp_src; - sh->dest = sctp_port_key->sctp_dst; + new_csum = sctp_compute_cksum(skb, sctphoff); - new_csum = sctp_compute_cksum(skb, sctphoff); + /* Carry any checksum errors through. */ + sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - /* Carry any checksum errors through. */ - sh->checksum = old_csum ^ old_correct_csum ^ new_csum; - - skb_clear_hash(skb); - key->tp.src = sctp_port_key->sctp_src; - key->tp.dst = sctp_port_key->sctp_dst; - } + skb_clear_hash(skb); + flow_key->tp.src = sh->source; + flow_key->tp.dst = sh->dest; return 0; } @@ -652,52 +720,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, key->ovs_flow_hash = hash; } -static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *nested_attr) +static int execute_set_action(struct sk_buff *skb, + struct sw_flow_key *flow_key, + const struct nlattr *a) +{ + /* Only tunnel set execution is supported without a mask. */ + if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) { + OVS_CB(skb)->egress_tun_info = nla_data(a); + return 0; + } + + return -EINVAL; +} + +/* Mask is at the midpoint of the data. */ +#define get_mask(a, type) ((const type)nla_data(a) + 1) + +static int execute_masked_set_action(struct sk_buff *skb, + struct sw_flow_key *flow_key, + const struct nlattr *a) { int err = 0; - switch (nla_type(nested_attr)) { + switch (nla_type(a)) { case OVS_KEY_ATTR_PRIORITY: - skb->priority = nla_get_u32(nested_attr); - key->phy.priority = skb->priority; + SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *)); + flow_key->phy.priority = skb->priority; break; case OVS_KEY_ATTR_SKB_MARK: - skb->mark = nla_get_u32(nested_attr); - key->phy.skb_mark = skb->mark; + SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); + flow_key->phy.skb_mark = skb->mark; break; case OVS_KEY_ATTR_TUNNEL_INFO: - OVS_CB(skb)->egress_tun_info = nla_data(nested_attr); + /* Masked data not supported for tunnel. */ + err = -EINVAL; break; case OVS_KEY_ATTR_ETHERNET: - err = set_eth_addr(skb, key, nla_data(nested_attr)); + err = set_eth_addr(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ethernet *)); break; case OVS_KEY_ATTR_IPV4: - err = set_ipv4(skb, key, nla_data(nested_attr)); + err = set_ipv4(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ipv4 *)); break; case OVS_KEY_ATTR_IPV6: - err = set_ipv6(skb, key, nla_data(nested_attr)); + err = set_ipv6(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_ipv6 *)); break; case OVS_KEY_ATTR_TCP: - err = set_tcp(skb, key, nla_data(nested_attr)); + err = set_tcp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_tcp *)); break; case OVS_KEY_ATTR_UDP: - err = set_udp(skb, key, nla_data(nested_attr)); + err = set_udp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_udp *)); break; case OVS_KEY_ATTR_SCTP: - err = set_sctp(skb, key, nla_data(nested_attr)); + err = set_sctp(skb, flow_key, nla_data(a), + get_mask(a, struct ovs_key_sctp *)); break; case OVS_KEY_ATTR_MPLS: - err = set_mpls(skb, key, nla_data(nested_attr)); + err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, + __be32 *)); break; } @@ -817,6 +910,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_set_action(skb, key, nla_data(a)); break; + case OVS_ACTION_ATTR_SET_MASKED: + case OVS_ACTION_ATTR_SET_TO_MASKED: + err = execute_masked_set_action(skb, key, nla_data(a)); + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 332b5a031739..ae5e77cdc0ca 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; static struct genl_family dp_datapath_genl_family; +static const struct nla_policy flow_policy[]; + static const struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP, }; @@ -83,8 +85,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, unsigned int group) { return info->nlhdr->nlmsg_flags & NLM_F_ECHO || - genl_has_listeners(family, genl_info_net(info)->genl_sock, - group); + genl_has_listeners(family, genl_info_net(info), group); } static void ovs_notify(struct genl_family *family, @@ -420,7 +421,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (!dp_ifindex) return -ENODEV; - if (vlan_tx_tag_present(skb)) { + if (skb_vlan_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; @@ -462,10 +463,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 0, upcall_info->cmd); upcall->dp_ifindex = dp_ifindex; - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - err = ovs_nla_put_flow(key, key, user_skb); + err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); BUG_ON(err); - nla_nest_end(user_skb, nla); if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -525,7 +524,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct vport *input_vport; int len; int err; - bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool log = !a[OVS_PACKET_ATTR_PROBE]; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || @@ -611,6 +610,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, }; static const struct genl_ops dp_packet_genl_ops[] = { @@ -664,46 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, } } -static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) +static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags) { - return NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */ - + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */ - + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ - + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ - + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ - + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ + return ovs_identifier_is_ufid(sfid) && + !(ufid_flags & OVS_UFID_F_OMIT_KEY); } -/* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_match(const struct sw_flow *flow, - struct sk_buff *skb) +static bool should_fill_mask(uint32_t ufid_flags) { - struct nlattr *nla; - int err; + return !(ufid_flags & OVS_UFID_F_OMIT_MASK); +} - /* Fill flow key. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); - if (!nla) - return -EMSGSIZE; +static bool should_fill_actions(uint32_t ufid_flags) +{ + return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS); +} - err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); - if (err) - return err; +static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, + uint32_t ufid_flags) +{ + size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - nla_nest_end(skb, nla); + /* OVS_FLOW_ATTR_UFID */ + if (sfid && ovs_identifier_is_ufid(sfid)) + len += nla_total_size(sfid->ufid_len); - /* Fill flow mask. */ - nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); - if (!nla) - return -EMSGSIZE; + /* OVS_FLOW_ATTR_KEY */ + if (!sfid || should_fill_key(sfid, ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); - if (err) - return err; + /* OVS_FLOW_ATTR_MASK */ + if (should_fill_mask(ufid_flags)) + len += nla_total_size(ovs_key_attr_size()); - nla_nest_end(skb, nla); - return 0; + /* OVS_FLOW_ATTR_ACTIONS */ + if (should_fill_actions(ufid_flags)) + len += nla_total_size(acts->actions_len); + + return len + + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + + nla_total_size(8); /* OVS_FLOW_ATTR_USED */ } /* Called with ovs_mutex or RCU read lock. */ @@ -774,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, /* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, - u32 seq, u32 flags, u8 cmd) + u32 seq, u32 flags, u8 cmd, u32 ufid_flags) { const int skb_orig_len = skb->len; struct ovs_header *ovs_header; @@ -787,19 +789,34 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, ovs_header->dp_ifindex = dp_ifindex; - err = ovs_flow_cmd_fill_match(flow, skb); + err = ovs_nla_put_identifier(flow, skb); if (err) goto error; + if (should_fill_key(&flow->id, ufid_flags)) { + err = ovs_nla_put_masked_key(flow, skb); + if (err) + goto error; + } + + if (should_fill_mask(ufid_flags)) { + err = ovs_nla_put_mask(flow, skb); + if (err) + goto error; + } + err = ovs_flow_cmd_fill_stats(flow, skb); if (err) goto error; - err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); - if (err) - goto error; + if (should_fill_actions(ufid_flags)) { + err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); + if (err) + goto error; + } - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; error: genlmsg_cancel(skb, ovs_header); @@ -808,15 +825,19 @@ error: /* May not be called with RCU read lock. */ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, + const struct sw_flow_id *sfid, struct genl_info *info, - bool always) + bool always, + uint32_t ufid_flags) { struct sk_buff *skb; + size_t len; if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) return NULL; - skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); + len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); + skb = genlmsg_new_unicast(len, info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -827,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, int dp_ifindex, struct genl_info *info, u8 cmd, - bool always) + bool always, u32 ufid_flags) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, - always); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), + &flow->id, info, always, ufid_flags); if (IS_ERR_OR_NULL(skb)) return skb; retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, - cmd); + cmd, ufid_flags); BUG_ON(retval < 0); return skb; } @@ -848,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow *flow, *new_flow; + struct sw_flow *flow = NULL, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; + struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; @@ -878,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->unmasked_key, &mask); + ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + ovs_flow_mask_key(&new_flow->key, &key, &mask); + + /* Extract flow identifier. */ + error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], + &key, log); + if (error) + goto err_kfree_flow; /* Validate actions. */ error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, @@ -894,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; } - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -906,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -ENODEV; goto err_unlock_ovs; } + /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (ovs_identifier_is_ufid(&new_flow->id)) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); + if (!flow) + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); @@ -923,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -941,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = -EEXIST; goto err_unlock_ovs; } - /* The unmasked key has to be the same for flow updates. */ - if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - /* Look for any overlapping flow. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + /* The flow identifier has to be the same for flow updates. + * Look for any overlapping flow. + */ + if (unlikely(!ovs_flow_cmp(flow, &match))) { + if (ovs_identifier_is_key(&flow->id)) + flow = ovs_flow_tbl_lookup_exact(&dp->table, + &match); + else /* UFID matches but key is different */ + flow = NULL; if (!flow) { error = -ENOENT; goto err_unlock_ovs; @@ -959,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } ovs_unlock(); @@ -1015,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; + struct sw_flow_id sfid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int error; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; /* Extract key. */ error = -EINVAL; @@ -1025,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto error; } + ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); @@ -1041,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) } /* Can allocate before locking if have acts. */ - reply = ovs_flow_cmd_alloc_info(acts, info, false); + reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false, + ufid_flags); if (IS_ERR(reply)) { error = PTR_ERR(reply); goto err_kfree_acts; @@ -1055,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; } /* Check that the flow exists. */ - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { error = -ENOENT; goto err_unlock_ovs; @@ -1071,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_NEW); + OVS_FLOW_CMD_NEW, + ufid_flags); BUG_ON(error < 0); } } else { /* Could not alloc without acts before locking. */ reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); + info, OVS_FLOW_CMD_NEW, false, + ufid_flags); + if (unlikely(IS_ERR(reply))) { error = PTR_ERR(reply); goto err_unlock_ovs; @@ -1114,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct datapath *dp; struct sw_flow_match match; - int err; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); + int err = 0; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (!a[OVS_FLOW_ATTR_KEY]) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, + log); + } else if (!ufid_present) { OVS_NLERR(log, "Flow get message rejected, Key attribute missing."); - return -EINVAL; + err = -EINVAL; } - - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (err) return err; @@ -1135,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (!flow) { err = -ENOENT; goto unlock; } reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, - OVS_FLOW_CMD_NEW, true); + OVS_FLOW_CMD_NEW, true, ufid_flags); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1161,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; - struct sw_flow *flow; + struct sw_flow *flow = NULL; struct datapath *dp; struct sw_flow_match match; + struct sw_flow_id ufid; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); int err; bool log = !a[OVS_FLOW_ATTR_PROBE]; + bool ufid_present; - if (likely(a[OVS_FLOW_ATTR_KEY])) { + ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); + if (a[OVS_FLOW_ATTR_KEY]) { ovs_match_init(&match, &key, NULL); err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log); @@ -1182,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (ufid_present) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid); + else + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { err = -ENOENT; goto unlock; @@ -1197,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_unlock(); reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, - info, false); + &flow->id, info, false, ufid_flags); if (likely(reply)) { if (likely(!IS_ERR(reply))) { rcu_read_lock(); /*To keep RCU checker happy. */ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, info->snd_seq, 0, - OVS_FLOW_CMD_DEL); + OVS_FLOW_CMD_DEL, + ufid_flags); rcu_read_unlock(); BUG_ON(err < 0); @@ -1223,9 +1291,18 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *a[__OVS_FLOW_ATTR_MAX]; struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct table_instance *ti; struct datapath *dp; + u32 ufid_flags; + int err; + + err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy); + if (err) + return err; + ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); rcu_read_lock(); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1248,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_FLOW_CMD_NEW) < 0) + OVS_FLOW_CMD_NEW, ufid_flags) < 0) break; cb->args[0] = bucket; @@ -1264,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, + [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 }, + [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 }, }; static const struct genl_ops dp_flow_genl_ops[] = { @@ -1349,7 +1428,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) goto nla_put_failure; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: genlmsg_cancel(skb, ovs_header); @@ -1723,7 +1803,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (err == -EMSGSIZE) goto error; - return genlmsg_end(skb, ovs_header); + genlmsg_end(skb, ovs_header); + return 0; nla_put_failure: err = -EMSGSIZE; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 70bef2ab7f2b..50ec42f170a0 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -70,6 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); + int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[node]); @@ -105,7 +106,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, if (likely(new_stats)) { new_stats->used = jiffies; new_stats->packet_count = 1; - new_stats->byte_count = skb->len; + new_stats->byte_count = len; new_stats->tcp_flags = tcp_flags; spin_lock_init(&new_stats->lock); @@ -120,7 +121,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, stats->used = jiffies; stats->packet_count++; - stats->byte_count += skb->len; + stats->byte_count += len; stats->tcp_flags |= tcp_flags; unlock: spin_unlock(&stats->lock); @@ -471,7 +472,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) */ key->eth.tci = 0; - if (vlan_tx_tag_present(skb)) + if (skb_vlan_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); else if (eth->h_proto == htons(ETH_P_8021Q)) if (unlikely(parse_vlan(skb, key))) @@ -690,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 8)) - 1 > sizeof(key->tun_opts)); - memcpy(GENEVE_OPTS(key, tun_info->options_len), + memcpy(TUN_METADATA_OPTS(key, tun_info->options_len), tun_info->options, tun_info->options_len); key->tun_opts_len = tun_info->options_len; } else { @@ -716,6 +717,8 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr, { int err; + memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); + /* Extract metadata from netlink attributes. */ err = ovs_nla_get_flow_metadata(attr, key, log); if (err) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a8b30f334388..a076e445ccc2 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel { struct ovs_tunnel_info { struct ovs_key_ipv4_tunnel tunnel; - const struct geneve_opt *options; + const void *options; u8 options_len; }; @@ -61,10 +61,10 @@ struct ovs_tunnel_info { * maximum size. This allows us to get the benefits of variable length * matching for small options. */ -#define GENEVE_OPTS(flow_key, opt_len) \ - ((struct geneve_opt *)((flow_key)->tun_opts + \ - FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ - opt_len)) +#define TUN_METADATA_OFFSET(opt_len) \ + (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) +#define TUN_METADATA_OPTS(flow_key, opt_len) \ + ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be32 saddr, __be32 daddr, @@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { tun_info->tunnel.tun_id = tun_id; @@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, __be16 tp_dst, __be64 tun_id, __be16 tun_flags, - const struct geneve_opt *opts, + const void *opts, u8 opts_len) { __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr, @@ -197,6 +197,16 @@ struct sw_flow_match { struct sw_flow_mask *mask; }; +#define MAX_UFID_LENGTH 16 /* 128 bits */ + +struct sw_flow_id { + u32 ufid_len; + union { + u32 ufid[MAX_UFID_LENGTH / 4]; + struct sw_flow_key *unmasked_key; + }; +}; + struct sw_flow_actions { struct rcu_head rcu; u32 actions_len; @@ -213,13 +223,15 @@ struct flow_stats { struct sw_flow { struct rcu_head rcu; - struct hlist_node hash_node[2]; - u32 hash; + struct { + struct hlist_node node[2]; + u32 hash; + } flow_table, ufid_table; int stats_last_writer; /* NUMA-node id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; - struct sw_flow_key unmasked_key; + struct sw_flow_id id; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one @@ -243,6 +255,16 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) +{ + return sfid->ufid_len; +} + +static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid) +{ + return !ovs_identifier_is_ufid(sfid); +} + void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, const struct sk_buff *); void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 9645a21d9eaa..216f20b90aa5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,14 @@ #include <net/mpls.h> #include "flow_netlink.h" +#include "vport-vxlan.h" + +struct ovs_len_tbl { + int len; + const struct ovs_len_tbl *next; +}; + +#define OVS_ATTR_NESTED -1 static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) @@ -261,6 +269,9 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. + */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } @@ -289,29 +300,45 @@ size_t ovs_key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } +static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, +}; + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), - [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), - [OVS_KEY_ATTR_TUNNEL] = -1, - [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), +static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, + [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, + [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, + [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, + [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, + .next = ovs_tunnel_key_lens, }, + [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, }; static bool is_all_zero(const u8 *fp, size_t size) @@ -352,8 +379,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { + expected_len = ovs_key_lens[type].len; + if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; @@ -432,13 +459,47 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a, SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); } - opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, - nla_len(a)); + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), nla_len(a), is_mask); return 0; } +static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, +}; + +static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; + unsigned long opt_key_offset; + struct ovs_vxlan_opts opts; + int err; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); + if (err < 0) + return err; + + memset(&opts, 0, sizeof(opts)); + + if (tb[OVS_VXLAN_EXT_GBP]) + opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + return 0; +} + static int ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -447,35 +508,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + int opts_type = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); int err; - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), - [OVS_TUNNEL_KEY_ATTR_OAM] = 0, - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, - }; - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { OVS_NLERR(log, "Tunnel attr %d out of range max %d", type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } - if (ovs_tunnel_key_lens[type] != nla_len(a) && - ovs_tunnel_key_lens[type] != -1) { + if (ovs_tunnel_key_lens[type].len != nla_len(a) && + ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", - type, nla_len(a), ovs_tunnel_key_lens[type]); + type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; } @@ -520,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_OAM; break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + err = genev_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; - tun_flags |= TUNNEL_OPTIONS_PRESENT; + tun_flags |= TUNNEL_GENEVE_OPT; + opts_type = type; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_VXLAN_OPT; + opts_type = type; break; default: OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", @@ -553,13 +620,29 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr, } } + return opts_type; +} + +static int vxlan_opt_to_nlattr(struct sk_buff *skb, + const void *tun_opts, int swkey_tun_opts_len) +{ + const struct ovs_vxlan_opts *opts = tun_opts; + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + if (!nla) + return -EMSGSIZE; + + if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } static int __ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) @@ -590,18 +673,22 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_OAM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; - if (tun_opts && - nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, - swkey_tun_opts_len, tun_opts)) - return -EMSGSIZE; + if (tun_opts) { + if (output->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_VXLAN_OPT && + vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) + return -EMSGSIZE; + } return 0; } static int ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output, - const struct geneve_opt *tun_opts, - int swkey_tun_opts_len) + const void *tun_opts, int swkey_tun_opts_len) { struct nlattr *nla; int err; @@ -675,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask, log)) + is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } @@ -915,18 +1002,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return 0; } -static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) +static void nlattr_set(struct nlattr *attr, u8 val, + const struct ovs_len_tbl *tbl) { struct nlattr *nla; int rem; /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - /* We assume that ovs_key_lens[type] == -1 means that type is a - * nested attribute - */ - if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) - nlattr_set(nla, val, false); + if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) + nlattr_set(nla, val, tbl[nla_type(nla)].next); else memset(nla_data(nla), val, nla_len(nla)); } @@ -934,7 +1019,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) static void mask_set_nlattr(struct nlattr *attr, u8 val) { - nlattr_set(attr, val, true); + nlattr_set(attr, val, ovs_key_lens); } /** @@ -1095,6 +1180,59 @@ free_newmask: return err; } +static size_t get_ufid_len(const struct nlattr *attr, bool log) +{ + size_t len; + + if (!attr) + return 0; + + len = nla_len(attr); + if (len < 1 || len > MAX_UFID_LENGTH) { + OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", + nla_len(attr), MAX_UFID_LENGTH); + return 0; + } + + return len; +} + +/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, + * or false otherwise. + */ +bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, + bool log) +{ + sfid->ufid_len = get_ufid_len(attr, log); + if (sfid->ufid_len) + memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); + + return sfid->ufid_len; +} + +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log) +{ + struct sw_flow_key *new_key; + + if (ovs_nla_get_ufid(sfid, ufid, log)) + return 0; + + /* If UFID was not provided, use unmasked key. */ + new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); + if (!new_key) + return -ENOMEM; + memcpy(new_key, key, sizeof(*key)); + sfid->unmasked_key = new_key; + + return 0; +} + +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) +{ + return attr ? nla_get_u32(attr) : 0; +} + /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * @key: Receives extracted in_port, priority, tun_key and skb_mark. @@ -1131,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr, return metadata_from_nlattrs(&match, &attrs, a, false, log); } -int ovs_nla_put_flow(const struct sw_flow_key *swkey, - const struct sw_flow_key *output, struct sk_buff *skb) +static int __ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, bool is_mask, + struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; struct nlattr *nla, *encap; - bool is_mask = (swkey != output); if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; @@ -1148,10 +1286,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; if ((swkey->tun_key.ipv4_dst || is_mask)) { - const struct geneve_opt *opts = NULL; + const void *opts = NULL; if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) - opts = GENEVE_OPTS(output, swkey->tun_opts_len); + opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, swkey->tun_opts_len)) @@ -1346,6 +1484,49 @@ nla_put_failure: return -EMSGSIZE; } +int ovs_nla_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, int attr, bool is_mask, + struct sk_buff *skb) +{ + int err; + struct nlattr *nla; + + nla = nla_nest_start(skb, attr); + if (!nla) + return -EMSGSIZE; + err = __ovs_nla_put_key(swkey, output, is_mask, skb); + if (err) + return err; + nla_nest_end(skb, nla); + + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, + flow->id.ufid); + + return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->key, &flow->key, + OVS_FLOW_ATTR_KEY, false, skb); +} + +/* Called with ovs_mutex or RCU read lock. */ +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) +{ + return ovs_nla_put_key(&flow->key, &flow->mask->key, + OVS_FLOW_ATTR_MASK, true, skb); +} + #define MAX_ACTIONS_BUFSIZE (32 * 1024) static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) @@ -1514,16 +1695,6 @@ static int validate_and_copy_sample(const struct nlattr *attr, return 0; } -static int validate_tp_port(const struct sw_flow_key *flow_key, - __be16 eth_type) -{ - if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && - (flow_key->tp.src || flow_key->tp.dst)) - return 0; - - return -EINVAL; -} - void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask) @@ -1540,6 +1711,34 @@ void ovs_match_init(struct sw_flow_match *match, } } +static int validate_geneve_opts(struct sw_flow_key *key) +{ + struct geneve_opt *option; + int opts_len = key->tun_opts_len; + bool crit_opt = false; + + option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); + while (opts_len > 0) { + int len; + + if (opts_len < sizeof(*option)) + return -EINVAL; + + len = sizeof(*option) + option->length * 4; + if (len > opts_len) + return -EINVAL; + + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); + + option = (struct geneve_opt *)((u8 *)option + len); + opts_len -= len; + }; + + key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + + return 0; +} + static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { @@ -1547,36 +1746,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_key key; struct ovs_tunnel_info *tun_info; struct nlattr *a; - int err, start; + int err = 0, start, opts_type; ovs_match_init(&match, &key, NULL); - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); - if (err) - return err; + opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); + if (opts_type < 0) + return opts_type; if (key.tun_opts_len) { - struct geneve_opt *option = GENEVE_OPTS(&key, - key.tun_opts_len); - int opts_len = key.tun_opts_len; - bool crit_opt = false; - - while (opts_len > 0) { - int len; - - if (opts_len < sizeof(*option)) - return -EINVAL; - - len = sizeof(*option) + option->length * 4; - if (len > opts_len) - return -EINVAL; - - crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); - - option = (struct geneve_opt *)((u8 *)option + len); - opts_len -= len; - }; - - key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; + switch (opts_type) { + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: + err = validate_geneve_opts(&key); + if (err < 0) + return err; + break; + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: + break; + } }; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); @@ -1597,9 +1783,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, * everything else will go away after flow setup. We can append * it to tun_info and then point there. */ - memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), - key.tun_opts_len); - tun_info->options = (struct geneve_opt *)(tun_info + 1); + memcpy((tun_info + 1), + TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); + tun_info->options = (tun_info + 1); } else { tun_info->options = NULL; } @@ -1609,21 +1795,43 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +/* Return false if there are any non-masked bits set. + * Mask follows data immediately, before any netlink padding. + */ +static bool validate_masked(u8 *data, int len) +{ + u8 *mask = data + len; + + while (len--) + if (*data++ & ~*mask++) + return false; + + return true; +} + static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun, __be16 eth_type, bool log) + bool *skip_copy, __be16 eth_type, bool masked, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); + size_t key_len; /* There can be only one key in a action */ if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; + key_len = nla_len(ovs_key); + if (masked) + key_len /= 2; + if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) + (ovs_key_lens[key_type].len != key_len && + ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) + return -EINVAL; + + if (masked && !validate_masked(nla_data(ovs_key), key_len)) return -EINVAL; switch (key_type) { @@ -1640,7 +1848,10 @@ static int validate_set(const struct nlattr *a, if (eth_p_mpls(eth_type)) return -EINVAL; - *set_tun = true; + if (masked) + return -EINVAL; /* Masked tunnel set not supported. */ + + *skip_copy = true; err = validate_and_copy_set_tun(a, sfa, log); if (err) return err; @@ -1650,48 +1861,66 @@ static int validate_set(const struct nlattr *a, if (eth_type != htons(ETH_P_IP)) return -EINVAL; - if (!flow_key->ip.proto) - return -EINVAL; - ipv4_key = nla_data(ovs_key); - if (ipv4_key->ipv4_proto != flow_key->ip.proto) - return -EINVAL; - if (ipv4_key->ipv4_frag != flow_key->ip.frag) - return -EINVAL; + if (masked) { + const struct ovs_key_ipv4 *mask = ipv4_key + 1; + /* Non-writeable fields. */ + if (mask->ipv4_proto || mask->ipv4_frag) + return -EINVAL; + } else { + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + } break; case OVS_KEY_ATTR_IPV6: if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; - if (!flow_key->ip.proto) - return -EINVAL; - ipv6_key = nla_data(ovs_key); - if (ipv6_key->ipv6_proto != flow_key->ip.proto) - return -EINVAL; - if (ipv6_key->ipv6_frag != flow_key->ip.frag) - return -EINVAL; + if (masked) { + const struct ovs_key_ipv6 *mask = ipv6_key + 1; + + /* Non-writeable fields. */ + if (mask->ipv6_proto || mask->ipv6_frag) + return -EINVAL; + /* Invalid bits in the flow label mask? */ + if (ntohl(mask->ipv6_label) & 0xFFF00000) + return -EINVAL; + } else { + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + } if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) return -EINVAL; break; case OVS_KEY_ATTR_TCP: - if (flow_key->ip.proto != IPPROTO_TCP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; case OVS_KEY_ATTR_UDP: - if (flow_key->ip.proto != IPPROTO_UDP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) @@ -1699,15 +1928,45 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_SCTP: - if (flow_key->ip.proto != IPPROTO_SCTP) + if ((eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6)) || + flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; - return validate_tp_port(flow_key, eth_type); + break; default: return -EINVAL; } + /* Convert non-masked non-tunnel set actions to masked set actions. */ + if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { + int start, len = key_len * 2; + struct nlattr *at; + + *skip_copy = true; + + start = add_nested_action_start(sfa, + OVS_ACTION_ATTR_SET_TO_MASKED, + log); + if (start < 0) + return start; + + at = __add_action(sfa, key_type, NULL, len, log); + if (IS_ERR(at)) + return PTR_ERR(at); + + memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ + memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ + /* Clear non-writeable bits from otherwise writeable fields. */ + if (key_type == OVS_KEY_ATTR_IPV6) { + struct ovs_key_ipv6 *mask = nla_data(at) + key_len; + + mask->ipv6_label &= htonl(0x000FFFFF); + } + add_nested_action_end(*sfa, start); + } + return 0; } @@ -1753,7 +2012,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, __be16 eth_type, __be16 vlan_tci, bool log) { const struct nlattr *a; - bool out_tnl_port = false; int rem, err; if (depth >= SAMPLE_ACTION_DEPTH) @@ -1770,6 +2028,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, + [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) }; @@ -1796,8 +2055,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; - out_tnl_port = false; - break; case OVS_ACTION_ATTR_HASH: { @@ -1832,12 +2089,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nla_data(a); - /* Networking stack do not allow simultaneous Tunnel - * and MPLS GSO. - */ - if (out_tnl_port) - return -EINVAL; - if (!eth_p_mpls(mpls->mpls_ethertype)) return -EINVAL; /* Prohibit push MPLS other than to a white list @@ -1873,11 +2124,16 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &out_tnl_port, eth_type, log); + &skip_copy, eth_type, false, log); if (err) return err; + break; - skip_copy = out_tnl_port; + case OVS_ACTION_ATTR_SET_MASKED: + err = validate_set(a, key, sfa, + &skip_copy, eth_type, true, log); + if (err) + return err; break; case OVS_ACTION_ATTR_SAMPLE: @@ -1905,6 +2161,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +/* 'key' must be the masked key. */ int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) @@ -1992,6 +2249,21 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return 0; } +static int masked_set_action_to_set_action_attr(const struct nlattr *a, + struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + size_t key_len = nla_len(ovs_key) / 2; + + /* Revert the conversion we did from a non-masked set action to + * masked set action. + */ + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key)) + return -EMSGSIZE; + + return 0; +} + int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -2007,6 +2279,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_SET_TO_MASKED: + err = masked_set_action_to_set_action_attr(a, skb); + if (err) + return err; + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 577f12be3459..5c3d75bff310 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); -int ovs_nla_put_flow(const struct sw_flow_key *, - const struct sw_flow_key *, struct sk_buff *); +int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, + int attr, bool is_mask, struct sk_buff *); int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, bool log); +int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); +int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb); + int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, const struct nlattr *mask, bool log); int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); +bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log); +int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, + const struct sw_flow_key *key, bool log); +u32 ovs_nla_get_ufid_flags(const struct nlattr *attr); + int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 5899bf161c61..4613df8c8290 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -85,6 +85,8 @@ struct sw_flow *ovs_flow_alloc(void) flow->sf_acts = NULL; flow->mask = NULL; + flow->id.unmasked_key = NULL; + flow->id.ufid_len = 0; flow->stats_last_writer = NUMA_NO_NODE; /* Initialize the default stat node. */ @@ -139,6 +141,8 @@ static void flow_free(struct sw_flow *flow) { int node; + if (ovs_identifier_is_key(&flow->id)) + kfree(flow->id.unmasked_key); kfree((struct sw_flow_actions __force *)flow->sf_acts); for_each_node(node) if (flow->stats[node]) @@ -200,18 +204,28 @@ static struct table_instance *table_instance_alloc(int new_size) int ovs_flow_tbl_init(struct flow_table *table) { - struct table_instance *ti; + struct table_instance *ti, *ufid_ti; ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) return -ENOMEM; + ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!ufid_ti) + goto free_ti; + rcu_assign_pointer(table->ti, ti); + rcu_assign_pointer(table->ufid_ti, ufid_ti); INIT_LIST_HEAD(&table->mask_list); table->last_rehash = jiffies; table->count = 0; + table->ufid_count = 0; return 0; + +free_ti: + __table_instance_destroy(ti); + return -ENOMEM; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) @@ -221,13 +235,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, bool deferred) +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti, + bool deferred) { int i; if (!ti) return; + BUG_ON(!ufid_ti); if (ti->keep_flows) goto skip_flows; @@ -236,18 +253,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred) struct hlist_head *head = flex_array_get(ti->buckets, i); struct hlist_node *n; int ver = ti->node_ver; + int ufid_ver = ufid_ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del_rcu(&flow->hash_node[ver]); + hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { + hlist_del_rcu(&flow->flow_table.node[ver]); + if (ovs_identifier_is_ufid(&flow->id)) + hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); ovs_flow_free(flow, deferred); } } skip_flows: - if (deferred) + if (deferred) { call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - else + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); + } else { __table_instance_destroy(ti); + __table_instance_destroy(ufid_ti); + } } /* No need for locking this function is called from RCU callback or @@ -256,8 +279,9 @@ skip_flows: void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); + struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); - table_instance_destroy(ti, false); + table_instance_destroy(ti, ufid_ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -272,7 +296,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, while (*bucket < ti->n_buckets) { i = 0; head = flex_array_get(ti->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { if (i < *last) { i++; continue; @@ -294,16 +318,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) (hash & (ti->n_buckets - 1))); } -static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +static void table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->flow_table.hash); + hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head); +} + +static void ufid_table_instance_insert(struct table_instance *ti, + struct sw_flow *flow) { struct hlist_head *head; - head = find_bucket(ti, flow->hash); - hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); + head = find_bucket(ti, flow->ufid_table.hash); + hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head); } static void flow_table_copy_flows(struct table_instance *old, - struct table_instance *new) + struct table_instance *new, bool ufid) { int old_ver; int i; @@ -318,15 +352,21 @@ static void flow_table_copy_flows(struct table_instance *old, head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, head, hash_node[old_ver]) - table_instance_insert(new, flow); + if (ufid) + hlist_for_each_entry(flow, head, + ufid_table.node[old_ver]) + ufid_table_instance_insert(new, flow); + else + hlist_for_each_entry(flow, head, + flow_table.node[old_ver]) + table_instance_insert(new, flow); } old->keep_flows = true; } static struct table_instance *table_instance_rehash(struct table_instance *ti, - int n_buckets) + int n_buckets, bool ufid) { struct table_instance *new_ti; @@ -334,32 +374,45 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti, if (!new_ti) return NULL; - flow_table_copy_flows(ti, new_ti); + flow_table_copy_flows(ti, new_ti, ufid); return new_ti; } int ovs_flow_tbl_flush(struct flow_table *flow_table) { - struct table_instance *old_ti; - struct table_instance *new_ti; + struct table_instance *old_ti, *new_ti; + struct table_instance *old_ufid_ti, *new_ufid_ti; - old_ti = ovsl_dereference(flow_table->ti); new_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!new_ti) return -ENOMEM; + new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ufid_ti) + goto err_free_ti; + + old_ti = ovsl_dereference(flow_table->ti); + old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); rcu_assign_pointer(flow_table->ti, new_ti); + rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); flow_table->last_rehash = jiffies; flow_table->count = 0; + flow_table->ufid_count = 0; - table_instance_destroy(old_ti, true); + table_instance_destroy(old_ti, old_ufid_ti, true); return 0; + +err_free_ti: + __table_instance_destroy(new_ti); + return -ENOMEM; } -static u32 flow_hash(const struct sw_flow_key *key, int key_start, - int key_end) +static u32 flow_hash(const struct sw_flow_key *key, + const struct sw_flow_key_range *range) { + int key_start = range->start; + int key_end = range->end; const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; @@ -395,19 +448,20 @@ static bool cmp_key(const struct sw_flow_key *key1, static bool flow_cmp_masked_key(const struct sw_flow *flow, const struct sw_flow_key *key, - int key_start, int key_end) + const struct sw_flow_key_range *range) { - return cmp_key(&flow->key, key, key_start, key_end); + return cmp_key(&flow->key, key, range->start, range->end); } -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match) +static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + const struct sw_flow_match *match) { struct sw_flow_key *key = match->key; int key_start = flow_key_start(key); int key_end = match->range.end; - return cmp_key(&flow->unmasked_key, key, key_start, key_end); + BUG_ON(ovs_identifier_is_ufid(&flow->id)); + return cmp_key(flow->id.unmasked_key, key, key_start, key_end); } static struct sw_flow *masked_flow_lookup(struct table_instance *ti, @@ -416,18 +470,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, { struct sw_flow *flow; struct hlist_head *head; - int key_start = mask->range.start; - int key_end = mask->range.end; u32 hash; struct sw_flow_key masked_key; ovs_flow_mask_key(&masked_key, unmasked, mask); - hash = flow_hash(&masked_key, key_start, key_end); + hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { - if (flow->mask == mask && flow->hash == hash && - flow_cmp_masked_key(flow, &masked_key, - key_start, key_end)) + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { + if (flow->mask == mask && flow->flow_table.hash == hash && + flow_cmp_masked_key(flow, &masked_key, &mask->range)) return flow; } return NULL; @@ -469,7 +520,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, /* Always called under ovs-mutex. */ list_for_each_entry(mask, &tbl->mask_list, list) { flow = masked_flow_lookup(ti, match->key, mask); - if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ + if (flow && ovs_identifier_is_key(&flow->id) && + ovs_flow_cmp_unmasked_key(flow, match)) + return flow; + } + return NULL; +} + +static u32 ufid_hash(const struct sw_flow_id *sfid) +{ + return jhash(sfid->ufid, sfid->ufid_len, 0); +} + +static bool ovs_flow_cmp_ufid(const struct sw_flow *flow, + const struct sw_flow_id *sfid) +{ + if (flow->id.ufid_len != sfid->ufid_len) + return false; + + return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); +} + +bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) +{ + if (ovs_identifier_is_ufid(&flow->id)) + return flow_cmp_masked_key(flow, match->key, &match->range); + + return ovs_flow_cmp_unmasked_key(flow, match); +} + +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, + const struct sw_flow_id *ufid) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti); + struct sw_flow *flow; + struct hlist_head *head; + u32 hash; + + hash = ufid_hash(ufid); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { + if (flow->ufid_table.hash == hash && + ovs_flow_cmp_ufid(flow, ufid)) return flow; } return NULL; @@ -486,9 +578,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) return num; } -static struct table_instance *table_instance_expand(struct table_instance *ti) +static struct table_instance *table_instance_expand(struct table_instance *ti, + bool ufid) { - return table_instance_rehash(ti, ti->n_buckets * 2); + return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -513,10 +606,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); + struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[ti->node_ver]); + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); table->count--; + if (ovs_identifier_is_ufid(&flow->id)) { + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); + table->ufid_count--; + } /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be * accessible as long as the RCU read lock is held. @@ -585,34 +683,64 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, } /* Must be called with OVS mutex held. */ -int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, - const struct sw_flow_mask *mask) +static void flow_key_insert(struct flow_table *table, struct sw_flow *flow) { struct table_instance *new_ti = NULL; struct table_instance *ti; - int err; - - err = flow_mask_insert(table, flow, mask); - if (err) - return err; - flow->hash = flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); + flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range); ti = ovsl_dereference(table->ti); table_instance_insert(ti, flow); table->count++; /* Expand table, if necessary, to make room. */ if (table->count > ti->n_buckets) - new_ti = table_instance_expand(ti); + new_ti = table_instance_expand(ti, false); else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) - new_ti = table_instance_rehash(ti, ti->n_buckets); + new_ti = table_instance_rehash(ti, ti->n_buckets, false); if (new_ti) { rcu_assign_pointer(table->ti, new_ti); - table_instance_destroy(ti, true); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); table->last_rehash = jiffies; } +} + +/* Must be called with OVS mutex held. */ +static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct table_instance *ti; + + flow->ufid_table.hash = ufid_hash(&flow->id); + ti = ovsl_dereference(table->ufid_ti); + ufid_table_instance_insert(ti, flow); + table->ufid_count++; + + /* Expand table, if necessary, to make room. */ + if (table->ufid_count > ti->n_buckets) { + struct table_instance *new_ti; + + new_ti = table_instance_expand(ti, true); + if (new_ti) { + rcu_assign_pointer(table->ufid_ti, new_ti); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + } + } +} + +/* Must be called with OVS mutex held. */ +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + const struct sw_flow_mask *mask) +{ + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + flow_key_insert(table, flow); + if (ovs_identifier_is_ufid(&flow->id)) + flow_ufid_insert(table, flow); + return 0; } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 309fa6415689..616eda10d955 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -47,9 +47,11 @@ struct table_instance { struct flow_table { struct table_instance __rcu *ti; + struct table_instance __rcu *ufid_ti; struct list_head mask_list; unsigned long last_rehash; unsigned int count; + unsigned int ufid_count; }; extern struct kmem_cache *flow_stats_cache; @@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, const struct sw_flow_match *match); -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_match *match); +struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *, + const struct sw_flow_id *); + +bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 347fa2325b22..bf02fd5808c9 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -9,8 +9,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/version.h> - #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> @@ -90,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) opts_len = geneveh->opt_len * 4; - flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | (geneveh->oam ? TUNNEL_OAM : 0) | (geneveh->critical ? TUNNEL_CRIT_OPT : 0); @@ -172,7 +170,7 @@ error: static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) { - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct ovs_tunnel_info *tun_info; struct net *net = ovs_dp_get_net(vport->dp); struct geneve_port *geneve_port = geneve_vport(vport); @@ -180,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) __be16 sport; struct rtable *rt; struct flowi4 fl; - u8 vni[3]; + u8 vni[3], opts_len, *opts; __be16 df; int err; @@ -191,16 +189,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &tun_info->tunnel; - - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -211,15 +200,25 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb) tunnel_id_to_vni(tun_key->tun_id, vni); skb->ignore_df = 1; + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { + opts = (u8 *)tun_info->options; + opts_len = tun_info->options_len; + } else { + opts = NULL; + opts_len = 0; + } + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, sport, dport, - tun_key->tun_flags, vni, - tun_info->options_len, (u8 *)tun_info->options, - false); + tun_key->tun_flags, vni, opts_len, opts, + !!(tun_key->tun_flags & TUNNEL_CSUM), false); if (err < 0) ip_rt_put(rt); + return err; + error: + kfree_skb(skb); return err; } diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 6b69df545b1d..f17ac9642f4e 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -73,7 +73,7 @@ static struct sk_buff *__build_header(struct sk_buff *skb, skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); if (IS_ERR(skb)) - return NULL; + return skb; tpi.flags = filter_tnl_flags(tun_key->tun_flags); tpi.proto = htons(ETH_P_TEB); @@ -134,7 +134,7 @@ static int gre_err(struct sk_buff *skb, u32 info, static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; struct flowi4 fl; struct rtable *rt; int min_headroom; @@ -144,27 +144,21 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; - goto error; + goto err_free_skb; } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_GRE; - - rt = ip_route_output_key(net, &fl); - if (IS_ERR(rt)) - return PTR_ERR(rt); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto err_free_skb; + } tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + @@ -183,8 +177,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) /* Push Tunnel header. */ skb = __build_header(skb, tunnel_hlen); - if (unlikely(!skb)) { - err = 0; + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + skb = NULL; goto err_free_rt; } @@ -198,7 +193,8 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false); err_free_rt: ip_rt_put(rt); -error: +err_free_skb: + kfree_skb(skb); return err; } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 38f95a52241b..3277a7520e31 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -40,6 +40,7 @@ #include "datapath.h" #include "vport.h" +#include "vport-vxlan.h" /** * struct vxlan_port - Keeps track of open UDP ports @@ -49,6 +50,7 @@ struct vxlan_port { struct vxlan_sock *vs; char name[IFNAMSIZ]; + u32 exts; /* VXLAN_F_* in <net/vxlan.h> */ }; static struct vport_ops ovs_vxlan_vport_ops; @@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) } /* Called with rcu_read_lock and BH disabled. */ -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + struct vxlan_metadata *md) { struct ovs_tunnel_info tun_info; + struct vxlan_port *vxlan_port; struct vport *vport = vs->data; struct iphdr *iph; + struct ovs_vxlan_opts opts = { + .gbp = md->gbp, + }; __be64 key; + __be16 flags; + + flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0); + vxlan_port = vxlan_vport(vport); + if (vxlan_port->exts & VXLAN_F_GBP && md->gbp) + flags |= TUNNEL_VXLAN_OPT; /* Save outer tunnel values */ iph = ip_hdr(skb); - key = cpu_to_be64(ntohl(vx_vni) >> 8); + key = cpu_to_be64(ntohl(md->vni) >> 8); ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, TUNNEL_KEY, NULL, 0); + key, flags, &opts, sizeof(opts)); ovs_vport_receive(vport, skb, &tun_info); } @@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; + + if (vxlan_port->exts) { + struct nlattr *exts; + + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + if (!exts) + return -EMSGSIZE; + + if (vxlan_port->exts & VXLAN_F_GBP && + nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) + return -EMSGSIZE; + + nla_nest_end(skb, exts); + } + return 0; } @@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport) ovs_vport_deferred_free(vport); } +static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = { + [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, +}; + +static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr) +{ + struct nlattr *exts[OVS_VXLAN_EXT_MAX+1]; + struct vxlan_port *vxlan_port; + int err; + + if (nla_len(attr) < sizeof(struct nlattr)) + return -EINVAL; + + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + if (err < 0) + return err; + + vxlan_port = vxlan_vport(vport); + + if (exts[OVS_VXLAN_EXT_GBP]) + vxlan_port->exts |= VXLAN_F_GBP; + + return 0; +} + static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); @@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); + a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); + if (a) { + err = vxlan_configure_exts(vport, a); + if (err) { + ovs_vport_free(vport); + goto error; + } + } + + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, + vxlan_port->exts); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; @@ -140,17 +203,34 @@ error: return ERR_PTR(err); } +static int vxlan_ext_gbp(struct sk_buff *skb) +{ + const struct ovs_tunnel_info *tun_info; + const struct ovs_vxlan_opts *opts; + + tun_info = OVS_CB(skb)->egress_tun_info; + opts = tun_info->options; + + if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT && + tun_info->options_len >= sizeof(*opts)) + return opts->gbp; + else + return 0; +} + static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; - struct ovs_key_ipv4_tunnel *tun_key; + const struct ovs_key_ipv4_tunnel *tun_key; + struct vxlan_metadata md = {0}; struct rtable *rt; struct flowi4 fl; __be16 src_port; __be16 df; int err; + u32 vxflags; if (unlikely(!OVS_CB(skb)->egress_tun_info)) { err = -EINVAL; @@ -158,15 +238,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) } tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; - /* Route lookup */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb->mark; - fl.flowi4_proto = IPPROTO_UDP; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; @@ -178,16 +250,20 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->ignore_df = 1; src_port = udp_flow_src_port(net, skb, 0, 0, true); + md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); + md.gbp = vxlan_ext_gbp(skb); + vxflags = vxlan_port->exts | + (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0); - err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, - fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(tun_key->tun_id) << 8), - false); + &md, false, vxflags); if (err < 0) ip_rt_put(rt); + return err; error: + kfree_skb(skb); return err; } diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h new file mode 100644 index 000000000000..4b08233e73d5 --- /dev/null +++ b/net/openvswitch/vport-vxlan.h @@ -0,0 +1,11 @@ +#ifndef VPORT_VXLAN_H +#define VPORT_VXLAN_H 1 + +#include <linux/kernel.h> +#include <linux/types.h> + +struct ovs_vxlan_opts { + __u32 gbp; +}; + +#endif diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 9584526c0778..ec2954ffc690 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -480,7 +480,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += skb->len + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; @@ -519,10 +520,9 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) u64_stats_update_end(&stats->syncp); } else if (sent < 0) { ovs_vport_record_error(vport, VPORT_E_TX_ERROR); - kfree_skb(skb); - } else + } else { ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); - + } return sent; } @@ -595,14 +595,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info, * The process may need to be changed if the corresponding process * in vports ops changed. */ - memset(&fl, 0, sizeof(fl)); - fl.daddr = tun_key->ipv4_dst; - fl.saddr = tun_key->ipv4_src; - fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); - fl.flowi4_mark = skb_mark; - fl.flowi4_proto = ipproto; - - rt = ip_route_output_key(net, &fl); + rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 99c8e71d9e6c..f8ae295fb001 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -236,4 +236,22 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, int ovs_vport_ops_register(struct vport_ops *ops); void ovs_vport_ops_unregister(struct vport_ops *ops); +static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, + const struct ovs_key_ipv4_tunnel *key, + u32 mark, + struct flowi4 *fl, + u8 protocol) +{ + struct rtable *rt; + + memset(fl, 0, sizeof(*fl)); + fl->daddr = key->ipv4_dst; + fl->saddr = key->ipv4_src; + fl->flowi4_tos = RT_TOS(key->ipv4_tos); + fl->flowi4_mark = mark; + fl->flowi4_proto = protocol; + + rt = ip_route_output_key(net, fl); + return rt; +} #endif /* vport.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e52a44785681..9c28cec1a083 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -785,6 +785,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, struct tpacket3_hdr *last_pkt; struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + struct sock *sk = &po->sk; if (po->stats.stats3.tp_drops) status |= TP_STATUS_LOSING; @@ -809,6 +810,8 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, /* Flush the block */ prb_flush_block(pkc1, pbd1, status); + sk->sk_data_ready(sk); + pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); } @@ -983,8 +986,8 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { - if (vlan_tx_tag_present(pkc->skb)) { - ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); + if (skb_vlan_tag_present(pkc->skb)) { + ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { @@ -1997,8 +2000,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_net = netoff; h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - if (vlan_tx_tag_present(skb)) { - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { @@ -2052,12 +2055,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, smp_wmb(); #endif - if (po->tp_version <= TPACKET_V2) + if (po->tp_version <= TPACKET_V2) { __packet_set_status(po, h.raw, status); - else + sk->sk_data_ready(sk); + } else { prb_clear_blk_fill_status(&po->rx_ring); - - sk->sk_data_ready(sk); + } drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -2099,7 +2102,7 @@ static bool ll_header_truncated(const struct net_device *dev, int len) { /* net device doesn't like empty head */ if (unlikely(len <= dev->hard_header_len)) { - net_warn_ratelimited("%s: packet size is too short (%d < %d)\n", + net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n", current->comm, len, dev->hard_header_len); return true; } @@ -2514,7 +2517,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); - if (unlikely(offset) < 0) + if (unlikely(offset < 0)) goto out_free; } else { if (ll_header_truncated(dev, len)) @@ -3007,8 +3010,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - if (vlan_tx_tag_present(skb)) { - aux.tp_vlan_tci = vlan_tx_tag_get(skb); + if (skb_vlan_tag_present(skb)) { + aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { diff --git a/net/packet/diag.c b/net/packet/diag.c index 92f2c7107eec..0ed68f0238bf 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -177,7 +177,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, PACKET_DIAG_FILTER)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index b64151ade6b3..bc5ee5fbe6ae 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -121,7 +121,8 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, ifm->ifa_index = dev->ifindex; if (nla_put_u8(skb, IFA_LOCAL, addr)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -190,7 +191,8 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, if (nla_put_u8(skb, RTA_DST, dst) || nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; nla_put_failure: nlmsg_cancel(skb, nlh); @@ -270,27 +272,23 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; + u8 addr; rcu_read_lock(); - for (addr = 0; addr < 64; addr++) { - struct net_device *dev; + for (addr = cb->args[0]; addr < 64; addr++) { + struct net_device *dev = phonet_route_get_rcu(net, addr << 2); - dev = phonet_route_get_rcu(net, addr << 2); if (!dev) continue; - if (addr_idx++ < addr_start_idx) - continue; if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE)) + cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0) goto out; } out: rcu_read_unlock(); - cb->args[0] = addr_idx; - cb->args[1] = 0; + cb->args[0] = addr; return skb->len; } diff --git a/net/rds/cong.c b/net/rds/cong.c index e5b65acd650b..e6144b8246fd 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -221,7 +221,21 @@ void rds_cong_queue_updates(struct rds_cong_map *map) list_for_each_entry(conn, &map->m_conn_list, c_map_item) { if (!test_and_set_bit(0, &conn->c_map_queued)) { rds_stats_inc(s_cong_update_queued); - rds_send_xmit(conn); + /* We cannot inline the call to rds_send_xmit() here + * for two reasons (both pertaining to a TCP transport): + * 1. When we get here from the receive path, we + * are already holding the sock_lock (held by + * tcp_v4_rcv()). So inlining calls to + * tcp_setsockopt and/or tcp_sendmsg will deadlock + * when it tries to get the sock_lock()) + * 2. Interrupts are masked so that we can mark the + * the port congested from both send and recv paths. + * (See comment around declaration of rdc_cong_lock). + * An attempt to get the sock_lock() here will + * therefore trigger warnings. + * Defer the xmit to rds_send_worker() instead. + */ + queue_delayed_work(rds_wq, &conn->c_send_w, 0); } } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 1dde91e3dc70..bd3825d38abc 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -409,7 +409,7 @@ try_again: posted = IB_GET_POST_CREDITS(oldval); avail = IB_GET_SEND_CREDITS(oldval); - rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n", + rdsdebug("wanted=%u credits=%u posted=%u\n", wanted, avail, posted); /* The last credit must be used to send a credit update. */ @@ -453,7 +453,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits) if (credits == 0) return; - rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n", + rdsdebug("credits=%u current=%u%s\n", credits, IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a91e1db62ee6..a6c2bea9f8f9 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -590,8 +590,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) /* Actually this may happen quite frequently, when * an outgoing connect raced with an incoming connect. */ - rdsdebug("rds_iw_conn_shutdown: failed to disconnect," - " cm: %p err %d\n", ic->i_cm_id, err); + rdsdebug("failed to disconnect, cm: %p err %d\n", + ic->i_cm_id, err); } if (ic->i_cm_id->qp) { diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 9105ea03aec5..13834780a308 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -361,7 +361,7 @@ try_again: posted = IB_GET_POST_CREDITS(oldval); avail = IB_GET_SEND_CREDITS(oldval); - rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n", + rdsdebug("wanted=%u credits=%u posted=%u\n", wanted, avail, posted); /* The last credit must be used to send a credit update. */ @@ -405,7 +405,7 @@ void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits) if (credits == 0) return; - rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n", + rdsdebug("credits=%u current=%u%s\n", credits, IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); diff --git a/net/rds/message.c b/net/rds/message.c index 5a21e6f5986f..756c73729126 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -266,7 +266,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) { - unsigned long to_copy; + unsigned long to_copy, nbytes; unsigned long sg_off; struct scatterlist *sg; int ret = 0; @@ -293,9 +293,9 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) sg->length - sg_off); rds_stats_add(s_copy_from_user, to_copy); - ret = copy_page_from_iter(sg_page(sg), sg->offset + sg_off, - to_copy, from); - if (ret != to_copy) + nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off, + to_copy, from); + if (nbytes != to_copy) return -EFAULT; sg_off += to_copy; diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index c3b0cd43eb56..c173f69e1479 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -71,14 +71,14 @@ static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "max_unacked_packets", .data = &rds_sysctl_max_unacked_packets, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "max_unacked_bytes", .data = &rds_sysctl_max_unacked_bytes, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 3f4a0bbeed3d..d978f2f46ff3 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -170,6 +170,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = { { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, { "BCM2E39", RFKILL_TYPE_BLUETOOTH }, { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, + { "BCM2E40", RFKILL_TYPE_BLUETOOTH }, { "BCM2E64", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, { "LNV4752", RFKILL_TYPE_GPS }, diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 74c0fcd36838..5394b6be46ec 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + if (!skb->len) { + _leave("UDP empty message"); + kfree_skb(skb); + return; + } rxrpc_new_skb(skb); diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index e1a9373e5979..8331c95e1522 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -232,10 +232,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - mm_segment_t oldfs = get_fs(); - set_fs(KERNEL_DS); ret = rxrpc_send_data(NULL, call->socket, call, msg, len); - set_fs(oldfs); } release_sock(&call->socket->sk); @@ -529,13 +526,11 @@ static int rxrpc_send_data(struct kiocb *iocb, struct msghdr *msg, size_t len) { struct rxrpc_skb_priv *sp; - unsigned char __user *from; struct sk_buff *skb; - const struct iovec *iov; struct sock *sk = &rx->sk; long timeo; bool more; - int ret, ioc, segment, copied; + int ret, copied; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -545,25 +540,17 @@ static int rxrpc_send_data(struct kiocb *iocb, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) return -EPIPE; - iov = msg->msg_iter.iov; - ioc = msg->msg_iter.nr_segs - 1; - from = iov->iov_base; - segment = iov->iov_len; - iov++; more = msg->msg_flags & MSG_MORE; skb = call->tx_pending; call->tx_pending = NULL; copied = 0; - do { + if (len > iov_iter_count(&msg->msg_iter)) + len = iov_iter_count(&msg->msg_iter); + while (len) { int copy; - if (segment > len) - segment = len; - - _debug("SEGMENT %d @%p", segment, from); - if (!skb) { size_t size, chunk, max, space; @@ -631,13 +618,13 @@ static int rxrpc_send_data(struct kiocb *iocb, /* append next segment of data to the current buffer */ copy = skb_tailroom(skb); ASSERTCMP(copy, >, 0); - if (copy > segment) - copy = segment; + if (copy > len) + copy = len; if (copy > sp->remain) copy = sp->remain; _debug("add"); - ret = skb_add_data(skb, from, copy); + ret = skb_add_data(skb, &msg->msg_iter, copy); _debug("added"); if (ret < 0) goto efault; @@ -646,18 +633,6 @@ static int rxrpc_send_data(struct kiocb *iocb, copied += copy; len -= copy; - segment -= copy; - from += copy; - while (segment == 0 && ioc > 0) { - from = iov->iov_base; - segment = iov->iov_len; - iov++; - ioc--; - } - if (len == 0) { - segment = 0; - ioc = 0; - } /* check for the far side aborting the call or a network error * occurring */ @@ -665,7 +640,7 @@ static int rxrpc_send_data(struct kiocb *iocb, goto call_aborted; /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || (segment == 0 && !more)) { + if (sp->remain <= 0 || (!len && !more)) { struct rxrpc_connection *conn = call->conn; uint32_t seq; size_t pad; @@ -711,11 +686,10 @@ static int rxrpc_send_data(struct kiocb *iocb, memcpy(skb->head, &sp->hdr, sizeof(struct rxrpc_header)); - rxrpc_queue_packet(call, skb, segment == 0 && !more); + rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more); skb = NULL; } - - } while (segment > 0); + } success: ret = copied; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 706af73c969f..2274e723a3df 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -698,6 +698,30 @@ config NET_ACT_VLAN To compile this code as a module, choose M here: the module will be called act_vlan. +config NET_ACT_BPF + tristate "BPF based action" + depends on NET_CLS_ACT + ---help--- + Say Y here to execute BPF code on packets. The BPF code will decide + if the packet should be dropped or not. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_bpf. + +config NET_ACT_CONNMARK + tristate "Netfilter Connection Mark Retriever" + depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES + depends on NF_CONNTRACK && NF_CONNTRACK_MARK + ---help--- + Say Y here to allow retrieving of conn mark + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_connmark. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 679f24ae7f93..7ca7f4c1b8c2 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o +obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o +obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c new file mode 100644 index 000000000000..82c5d7fc1988 --- /dev/null +++ b/net/sched/act_bpf.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/filter.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_bpf.h> +#include <net/tc_act/tc_bpf.h> + +#define BPF_TAB_MASK 15 + +static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_bpf *b = a->priv; + int action; + int filter_res; + + spin_lock(&b->tcf_lock); + b->tcf_tm.lastuse = jiffies; + bstats_update(&b->tcf_bstats, skb); + action = b->tcf_action; + + filter_res = BPF_PROG_RUN(b->filter, skb); + if (filter_res == 0) { + /* Return code 0 from the BPF program + * is being interpreted as a drop here. + */ + action = TC_ACT_SHOT; + b->tcf_qstats.drops++; + } + + spin_unlock(&b->tcf_lock); + return action; +} + +static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *tp = skb_tail_pointer(skb); + struct tcf_bpf *b = a->priv; + struct tc_act_bpf opt = { + .index = b->tcf_index, + .refcnt = b->tcf_refcnt - ref, + .bindcnt = b->tcf_bindcnt - bind, + .action = b->tcf_action, + }; + struct tcf_t t; + struct nlattr *nla; + + if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) + goto nla_put_failure; + + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * + sizeof(struct sock_filter)); + if (!nla) + goto nla_put_failure; + + memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); + + t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(b->tcf_tm.expires); + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, tp); + return -1; +} + +static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { + [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, + [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, + [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, + .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, +}; + +static int tcf_bpf_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + struct tcf_bpf *b; + u16 bpf_size, bpf_num_ops; + struct sock_filter *bpf_ops; + struct sock_fprog_kern tmp; + struct bpf_prog *fp; + int ret; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + if (ret < 0) + return ret; + + if (!tb[TCA_ACT_BPF_PARMS] || + !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) + return -EINVAL; + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); + + bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) + return -EINVAL; + + bpf_size = bpf_num_ops * sizeof(*bpf_ops); + if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) + return -EINVAL; + + bpf_ops = kzalloc(bpf_size, GFP_KERNEL); + if (!bpf_ops) + return -ENOMEM; + + memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); + + tmp.len = bpf_num_ops; + tmp.filter = bpf_ops; + + ret = bpf_prog_create(&fp, &tmp); + if (ret) + goto free_bpf_ops; + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); + if (ret) + goto destroy_fp; + + ret = ACT_P_CREATED; + } else { + if (bind) + goto destroy_fp; + tcf_hash_release(a, bind); + if (!ovr) { + ret = -EEXIST; + goto destroy_fp; + } + } + + b = to_bpf(a); + spin_lock_bh(&b->tcf_lock); + b->tcf_action = parm->action; + b->bpf_num_ops = bpf_num_ops; + b->bpf_ops = bpf_ops; + b->filter = fp; + spin_unlock_bh(&b->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(a); + return ret; + +destroy_fp: + bpf_prog_destroy(fp); +free_bpf_ops: + kfree(bpf_ops); + return ret; +} + +static void tcf_bpf_cleanup(struct tc_action *a, int bind) +{ + struct tcf_bpf *b = a->priv; + + bpf_prog_destroy(b->filter); +} + +static struct tc_action_ops act_bpf_ops = { + .kind = "bpf", + .type = TCA_ACT_BPF, + .owner = THIS_MODULE, + .act = tcf_bpf, + .dump = tcf_bpf_dump, + .cleanup = tcf_bpf_cleanup, + .init = tcf_bpf_init, +}; + +static int __init bpf_init_module(void) +{ + return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); +} + +static void __exit bpf_cleanup_module(void) +{ + tcf_unregister_action(&act_bpf_ops); +} + +module_init(bpf_init_module); +module_exit(bpf_cleanup_module); + +MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); +MODULE_DESCRIPTION("TC BPF based action"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c new file mode 100644 index 000000000000..8e472518f9f6 --- /dev/null +++ b/net/sched/act_connmark.c @@ -0,0 +1,192 @@ +/* + * net/sched/act_connmark.c netfilter connmark retriever action + * skb mark is over-written + * + * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/pkt_cls.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/act_api.h> +#include <uapi/linux/tc_act/tc_connmark.h> +#include <net/tc_act/tc_connmark.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> + +#define CONNMARK_TAB_MASK 3 + +static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + const struct nf_conntrack_tuple_hash *thash; + struct nf_conntrack_tuple tuple; + enum ip_conntrack_info ctinfo; + struct tcf_connmark_info *ca = a->priv; + struct nf_conn *c; + int proto; + + spin_lock(&ca->tcf_lock); + ca->tcf_tm.lastuse = jiffies; + bstats_update(&ca->tcf_bstats, skb); + + if (skb->protocol == htons(ETH_P_IP)) { + if (skb->len < sizeof(struct iphdr)) + goto out; + + proto = NFPROTO_IPV4; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (skb->len < sizeof(struct ipv6hdr)) + goto out; + + proto = NFPROTO_IPV6; + } else { + goto out; + } + + c = nf_ct_get(skb, &ctinfo); + if (c) { + skb->mark = c->mark; + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + nf_ct_put(c); + goto out; + } + + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + proto, &tuple)) + goto out; + + thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple); + if (!thash) + goto out; + + c = nf_ct_tuplehash_to_ctrack(thash); + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + skb->mark = c->mark; + nf_ct_put(c); + +out: + skb->nfct = NULL; + spin_unlock(&ca->tcf_lock); + return ca->tcf_action; +} + +static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { + [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) }, +}; + +static int tcf_connmark_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) +{ + struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_connmark_info *ci; + struct tc_connmark *parm; + int ret = 0; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy); + if (ret < 0) + return ret; + + parm = nla_data(tb[TCA_CONNMARK_PARMS]); + + if (!tcf_hash_check(parm->index, a, bind)) { + ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind); + if (ret) + return ret; + + ci = to_connmark(a); + ci->tcf_action = parm->action; + ci->zone = parm->zone; + + tcf_hash_insert(a); + ret = ACT_P_CREATED; + } else { + ci = to_connmark(a); + if (bind) + return 0; + tcf_hash_release(a, bind); + if (!ovr) + return -EEXIST; + /* replacing action and zone */ + ci->tcf_action = parm->action; + ci->zone = parm->zone; + } + + return ret; +} + +static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_connmark_info *ci = a->priv; + + struct tc_connmark opt = { + .index = ci->tcf_index, + .refcnt = ci->tcf_refcnt - ref, + .bindcnt = ci->tcf_bindcnt - bind, + .action = ci->tcf_action, + .zone = ci->zone, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); + if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_connmark_ops = { + .kind = "connmark", + .type = TCA_ACT_CONNMARK, + .owner = THIS_MODULE, + .act = tcf_connmark, + .dump = tcf_connmark_dump, + .init = tcf_connmark_init, +}; + +static int __init connmark_init_module(void) +{ + return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK); +} + +static void __exit connmark_cleanup_module(void) +{ + tcf_unregister_action(&act_connmark_ops); +} + +module_init(connmark_init_module); +module_exit(connmark_cleanup_module); +MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); +MODULE_DESCRIPTION("Connection tracking mark restoring"); +MODULE_LICENSE("GPL"); + diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index edbf40dac709..4cd5cf1aedf8 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb, if (unlikely(action == TC_ACT_SHOT)) goto drop; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index aad6a679fb13..baef987fe2c0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -556,8 +556,9 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, } EXPORT_SYMBOL(tcf_exts_change); -#define tcf_exts_first_act(ext) \ - list_first_entry(&(exts)->actions, struct tc_action, list) +#define tcf_exts_first_act(ext) \ + list_first_entry_or_null(&(exts)->actions, \ + struct tc_action, list) int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { @@ -603,7 +604,7 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct tc_action *a = tcf_exts_first_act(exts); - if (tcf_action_copy_stats(skb, a, 1) < 0) + if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0) return -1; #endif return 0; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5aed341406c2..fc399db86f11 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -65,9 +65,12 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) if (head == NULL) return 0UL; - list_for_each_entry(f, &head->flist, link) - if (f->handle == handle) + list_for_each_entry(f, &head->flist, link) { + if (f->handle == handle) { l = (unsigned long) f; + break; + } + } return l; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 84c8219c3e1c..5f3ee9e4b5bf 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -37,7 +37,7 @@ struct cls_bpf_prog { struct tcf_result res; struct list_head link; u32 handle; - u16 bpf_len; + u16 bpf_num_ops; struct tcf_proto *tp; struct rcu_head rcu; }; @@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct tcf_exts exts; struct sock_fprog_kern tmp; struct bpf_prog *fp; - u16 bpf_size, bpf_len; + u16 bpf_size, bpf_num_ops; u32 classid; int ret; @@ -173,13 +173,18 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return ret; classid = nla_get_u32(tb[TCA_BPF_CLASSID]); - bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]); - if (bpf_len > BPF_MAXINSNS || bpf_len == 0) { + bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) { + ret = -EINVAL; + goto errout; + } + + bpf_size = bpf_num_ops * sizeof(*bpf_ops); + if (bpf_size != nla_len(tb[TCA_BPF_OPS])) { ret = -EINVAL; goto errout; } - bpf_size = bpf_len * sizeof(*bpf_ops); bpf_ops = kzalloc(bpf_size, GFP_KERNEL); if (bpf_ops == NULL) { ret = -ENOMEM; @@ -188,14 +193,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - tmp.len = bpf_len; + tmp.len = bpf_num_ops; tmp.filter = bpf_ops; ret = bpf_prog_create(&fp, &tmp); if (ret) goto errout_free; - prog->bpf_len = bpf_len; + prog->bpf_num_ops = bpf_num_ops; prog->bpf_ops = bpf_ops; prog->filter = fp; prog->res.classid = classid; @@ -215,15 +220,21 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, struct cls_bpf_head *head) { unsigned int i = 0x80000000; + u32 handle; do { if (++head->hgen == 0x7FFFFFFF) head->hgen = 1; } while (--i > 0 && cls_bpf_get(tp, head->hgen)); - if (i == 0) + + if (unlikely(i == 0)) { pr_err("Insufficient number of handles\n"); + handle = 0; + } else { + handle = head->hgen; + } - return i; + return handle; } static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, @@ -303,10 +314,10 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len)) + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) goto nla_put_failure; - nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len * + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * sizeof(struct sock_filter)); if (nla == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 15d68f24a521..461410394d08 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { if (flow->dst) return ntohl(flow->dst); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) @@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys if (flow->ports) return ntohs(flow->port16[1]); - return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; + return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -156,7 +156,7 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 5b4a4efe468c..a3d79c8bf3b8 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct net_device *dev, *indev = NULL; int ret, network_offset; - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): acpar.family = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index c8f8c399b99a..b5294ce20cd4 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag) { unsigned short tag; - tag = vlan_tx_tag_get(skb); + tag = skb_vlan_tag_get(skb); if (!tag && __vlan_get_tag(skb, &tag)) *err = -1; else @@ -197,7 +197,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = skb->protocol; + dst->value = tc_skb_protocol(skb); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 76f402e05bd6..243b7d169d61 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1807,7 +1807,7 @@ done: int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - __be16 protocol = skb->protocol; + __be16 protocol = tc_skb_protocol(skb); int err; for (; tp; tp = rcu_dereference_bh(tp->next)) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 227114f27f94..66700a6116aa 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (skb->protocol) { + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); @@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mask[index] != 0xff || p->value[index]) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb->protocol)); + __func__, ntohs(tc_skb_protocol(skb))); break; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 9b05924cc386..dfcea20e3171 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1,7 +1,7 @@ /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * - * Copyright (C) 2013 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -52,6 +52,7 @@ #include <net/pkt_sched.h> #include <net/sock.h> #include <net/tcp_states.h> +#include <net/tcp.h> /* * Per flow structure, dynamically allocated @@ -92,6 +93,7 @@ struct fq_sched_data { u32 flow_refill_delay; u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ + u32 orphan_mask; /* mask for orphaned skb */ struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -222,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) return &q->internal; - if (unlikely(!sk)) { + /* SYNACK messages are attached to a listener socket. + * 1) They are not part of a 'flow' yet + * 2) We do not want to rate limit them (eg SYNFLOOD attack), + * especially if the listener set SO_MAX_PACING_RATE + * 3) We pretend they are orphaned + */ + if (!sk || sk->sk_state == TCP_LISTEN) { + unsigned long hash = skb_get_hash(skb) & q->orphan_mask; + /* By forcing low order bit to 1, we make sure to not * collide with a local flow (socket pointers are word aligned) */ - sk = (struct sock *)(skb_get_hash(skb) | 1L); + sk = (struct sock *)((hash << 1) | 1UL); + skb_orphan(skb); } root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; @@ -445,7 +456,9 @@ begin: goto begin; } - if (unlikely(f->head && now < f->time_next_packet)) { + skb = f->head; + if (unlikely(skb && now < f->time_next_packet && + !skb_is_tcp_pure_ack(skb))) { head->first = f->next; fq_flow_set_throttled(q, f); goto begin; @@ -464,14 +477,17 @@ begin: goto begin; } prefetch(&skb->end); - f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); if (f->credit > 0 || !q->rate_enable) goto out; + /* Do not pace locally generated ack packets */ + if (skb_is_tcp_pure_ack(skb)) + goto out; + rate = q->flow_max_rate; - if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); if (rate != ~0U) { @@ -670,8 +686,14 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_FLOW_PLIMIT]) q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]); - if (tb[TCA_FQ_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); + if (tb[TCA_FQ_QUANTUM]) { + u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); + + if (quantum > 0) + q->quantum = quantum; + else + err = -EINVAL; + } if (tb[TCA_FQ_INITIAL_QUANTUM]) q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); @@ -698,6 +720,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } + if (tb[TCA_FQ_ORPHAN_MASK]) + q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); + if (!err) { sch_tree_unlock(sch); err = fq_resize(sch, fq_log); @@ -743,6 +768,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->delayed = RB_ROOT; q->fq_root = NULL; q->fq_trees_log = ilog2(1024); + q->orphan_mask = 1024 - 1; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -772,6 +798,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || + nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6ada42396a24..e02687185a59 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -122,13 +122,6 @@ teql_peek(struct Qdisc *sch) return NULL; } -static inline void -teql_neigh_release(struct neighbour *n) -{ - if (n) - neigh_release(n); -} - static void teql_reset(struct Qdisc *sch) { @@ -249,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, - NULL, skb->len); + err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + haddr, NULL, skb->len); if (err < 0) err = -EINVAL; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index f791edd64d6c..197c3f59ecbf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -391,8 +391,7 @@ void sctp_association_free(struct sctp_association *asoc) sctp_asconf_queue_teardown(asoc); /* Free pending address space being deleted */ - if (asoc->asconf_addr_del_pending != NULL) - kfree(asoc->asconf_addr_del_pending); + kfree(asoc->asconf_addr_del_pending); /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); @@ -1182,7 +1181,6 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.peer_hmacs = new->peer.peer_hmacs; new->peer.peer_hmacs = NULL; - sctp_auth_key_put(asoc->asoc_shared_key); sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e49e231cef52..06320c8c1c86 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2608,7 +2608,7 @@ do_addr_param: addr_param = param.v + sizeof(sctp_addip_param_t); - af = sctp_get_af_specific(param_type2af(param.p->type)); + af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (af == NULL) break; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2625eccb77d5..aafe94bf292e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1603,7 +1603,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sctp_assoc_t associd = 0; sctp_cmsgs_t cmsgs = { NULL }; sctp_scope_t scope; - bool fill_sinfo_ttl = false; + bool fill_sinfo_ttl = false, wait_connect = false; struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; __u16 sinfo_flags = 0; @@ -1943,6 +1943,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (err < 0) goto out_free; + wait_connect = true; pr_debug("%s: we associated primitively\n", __func__); } @@ -1980,6 +1981,11 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sctp_datamsg_put(datamsg); err = msg_len; + if (unlikely(wait_connect)) { + timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT); + sctp_wait_for_connect(asoc, &timeo); + } + /* If we are already past ASSOCIATE, the lower * layers are responsible for association cleanup. */ diff --git a/net/socket.c b/net/socket.c index a2c33a4dc7ba..bbedbfcb42c2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -113,10 +113,8 @@ unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to); +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); @@ -142,8 +140,10 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static const struct file_operations socket_file_ops = { .owner = THIS_MODULE, .llseek = no_llseek, - .aio_read = sock_aio_read, - .aio_write = sock_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = sock_read_iter, + .write_iter = sock_write_iter, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT @@ -613,13 +613,6 @@ EXPORT_SYMBOL(__sock_tx_timestamp); static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - return sock->ops->sendmsg(iocb, sock, msg, size); } @@ -635,11 +628,9 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, bool nosec) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : __sock_sendmsg(&iocb, sock, msg, size); if (-EIOCBQUEUED == ret) @@ -756,14 +747,6 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - si->flags = flags; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); } @@ -779,11 +762,9 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = __sock_recvmsg(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); @@ -795,11 +776,9 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; - struct sock_iocb siocb; int ret; init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); @@ -866,92 +845,47 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - struct sock_iocb *siocb) -{ - if (!is_sync_kiocb(iocb)) - BUG(); - - siocb->kiocb = iocb; - iocb->private = siocb; - return siocb; -} - -static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; + struct msghdr msg = {.msg_iter = *to}; + ssize_t res; - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size); - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; - return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); -} - -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; - - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + res = __sock_recvmsg(iocb, sock, &msg, + iocb->ki_nbytes, msg.msg_flags); + *to = msg.msg_iter; + return res; } -static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size); - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - if (sock->type == SOCK_SEQPACKET) - msg->msg_flags |= MSG_EOR; - - return __sock_sendmsg(iocb, sock, msg, size); -} + struct msghdr msg = {.msg_iter = *from}; + ssize_t res; -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; - - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; + + if (sock->type == SOCK_SEQPACKET) + msg.msg_flags |= MSG_EOR; - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + *from = msg.msg_iter; + return res; } /* diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 05da12a33945..612aa73bbc60 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -286,10 +286,8 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt, static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) { - clnt->cl_nodelen = strlen(nodename); - if (clnt->cl_nodelen > UNX_MAXNODENAME) - clnt->cl_nodelen = UNX_MAXNODENAME; - memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); + clnt->cl_nodelen = strlcpy(clnt->cl_nodename, + nodename, sizeof(clnt->cl_nodename)); } static int rpc_client_register(struct rpc_clnt *clnt, @@ -365,6 +363,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, const struct rpc_version *version; struct rpc_clnt *clnt = NULL; const struct rpc_timeout *timeout; + const char *nodename = args->nodename; int err; /* sanity check the name before trying to print it */ @@ -420,8 +419,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, atomic_set(&clnt->cl_count, 1); + if (nodename == NULL) + nodename = utsname()->nodename; /* save the nodename */ - rpc_clnt_set_nodename(clnt, utsname()->nodename); + rpc_clnt_set_nodename(clnt, nodename); err = rpc_client_register(clnt, args->authflavor, args->client_name); if (err) @@ -576,6 +577,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, if (xprt == NULL) goto out_err; args->servername = xprt->servername; + args->nodename = clnt->cl_nodename; new = rpc_new_client(args, xprt, clnt); if (IS_ERR(new)) { @@ -1824,6 +1826,7 @@ call_connect_status(struct rpc_task *task) case -ECONNABORTED: case -ENETUNREACH: case -EHOSTUNREACH: + case -EADDRINUSE: case -ENOBUFS: case -EPIPE: if (RPC_IS_SOFTCONN(task)) @@ -1932,6 +1935,7 @@ call_transmit_status(struct rpc_task *task) } case -ECONNRESET: case -ECONNABORTED: + case -EADDRINUSE: case -ENOTCONN: case -ENOBUFS: case -EPIPE: @@ -2051,6 +2055,7 @@ call_status(struct rpc_task *task) case -ECONNRESET: case -ECONNABORTED: rpc_force_rebind(clnt); + case -EADDRINUSE: case -ENOBUFS: rpc_delay(task, 3*HZ); case -EPIPE: diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 05202012bcfc..cf5770d8f49a 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -355,7 +355,8 @@ out: return result; } -static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, +static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename, + const char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version) { @@ -365,6 +366,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, .address = srvaddr, .addrsize = salen, .servername = hostname, + .nodename = nodename, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, @@ -740,7 +742,9 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); - rpcb_clnt = rpcb_create(xprt->xprt_net, xprt->servername, sap, salen, + rpcb_clnt = rpcb_create(xprt->xprt_net, + clnt->cl_nodename, + xprt->servername, sap, salen, xprt->prot, bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d20f2329eea3..b91fd9c597b4 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -844,10 +844,10 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + gfp_t gfp = GFP_NOIO | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - gfp |= __GFP_MEMALLOC; + gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) @@ -1069,7 +1069,8 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1); + /* Note: highpri because network receive is latency sensitive */ + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 9711a155bc50..2ecb994314c1 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -140,22 +140,20 @@ void rpc_free_iostats(struct rpc_iostats *stats) EXPORT_SYMBOL_GPL(rpc_free_iostats); /** - * rpc_count_iostats - tally up per-task stats + * rpc_count_iostats_metrics - tally up per-task stats * @task: completed rpc_task - * @stats: array of stat structures + * @op_metrics: stat structure for OP that will accumulate stats from @task */ -void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) +void rpc_count_iostats_metrics(const struct rpc_task *task, + struct rpc_iostats *op_metrics) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *op_metrics; ktime_t delta, now; - if (!stats || !req) + if (!op_metrics || !req) return; now = ktime_get(); - op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; - spin_lock(&op_metrics->om_lock); op_metrics->om_ops++; @@ -175,6 +173,20 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) spin_unlock(&op_metrics->om_lock); } +EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics); + +/** + * rpc_count_iostats - tally up per-task stats + * @task: completed rpc_task + * @stats: array of stat structures + * + * Uses the statidx from @task + */ +void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) +{ + rpc_count_iostats_metrics(task, + &stats[task->tk_msg.rpc_proc->p_statidx]); +} EXPORT_SYMBOL_GPL(rpc_count_iostats); static void _print_name(struct seq_file *seq, unsigned int op, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 91eaef1844c8..78974e4d9ad2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) EXPORT_SYMBOL_GPL(svc_set_num_threads); /* - * Called from a server thread as it's exiting. Caller must hold the BKL or - * the "service mutex", whichever is appropriate for the service. + * Called from a server thread as it's exiting. Caller must hold the "service + * mutex" for the service. */ void svc_exit_thread(struct svc_rqst *rqstp) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c69358b3cf7f..163ac45c3639 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list); * svc_pool->sp_lock protects most of the fields of that pool. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. - * BKL protects svc_serv->sv_nrthread. + * The "service mutex" protects svc_serv->sv_nrthread. * svc_sock->sk_lock protects the svc_sock->sk_deferred list * and the ->sk_info_authunix cache. * @@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list); * that no other thread will be using the transport or will * try to set XPT_DEAD. */ - int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 1cb61242e55e..4439ac4c1b53 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -606,7 +606,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) struct kvec *head = buf->head; struct kvec *tail = buf->tail; int fraglen; - int new, old; + int new; if (len > buf->len) { WARN_ON_ONCE(1); @@ -629,8 +629,8 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) buf->len -= fraglen; new = buf->page_base + buf->page_len; - old = new + fraglen; - xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT); + + xdr->page_ptr = buf->pages + (new >> PAGE_SHIFT); if (buf->page_len) { xdr->p = page_address(*xdr->page_ptr); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ebbefad21a37..e3015aede0d9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -683,13 +683,43 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); } +bool xprt_lock_connect(struct rpc_xprt *xprt, + struct rpc_task *task, + void *cookie) +{ + bool ret = false; + + spin_lock_bh(&xprt->transport_lock); + if (!test_bit(XPRT_LOCKED, &xprt->state)) + goto out; + if (xprt->snd_task != task) + goto out; + xprt->snd_task = cookie; + ret = true; +out: + spin_unlock_bh(&xprt->transport_lock); + return ret; +} + +void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) +{ + spin_lock_bh(&xprt->transport_lock); + if (xprt->snd_task != cookie) + goto out; + if (!test_bit(XPRT_LOCKED, &xprt->state)) + goto out; + xprt->snd_task =NULL; + xprt->ops->release_xprt(xprt, NULL); +out: + spin_unlock_bh(&xprt->transport_lock); +} + /** * xprt_connect - schedule a transport connect operation * @task: RPC task that is requesting the connect @@ -712,9 +742,7 @@ void xprt_connect(struct rpc_task *task) if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) xprt->ops->close(xprt); - if (xprt_connected(xprt)) - xprt_release_write(xprt, task); - else { + if (!xprt_connected(xprt)) { task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = task->tk_rqstp->rq_timeout; rpc_sleep_on(&xprt->pending, task, xprt_connect_status); @@ -726,6 +754,7 @@ void xprt_connect(struct rpc_task *task) xprt->stat.connect_start = jiffies; xprt->ops->connect(xprt, task); } + xprt_release_write(xprt, task); } static void xprt_connect_status(struct rpc_task *task) @@ -758,7 +787,6 @@ static void xprt_connect_status(struct rpc_task *task) dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, xprt->servername); - xprt_release_write(xprt, task); task->tk_status = -EIO; } } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index df01d124936c..7e9acd9361c5 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -209,9 +209,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, if (cur_rchunk) { /* read */ cur_rchunk->rc_discrim = xdr_one; /* all read chunks have the same "position" */ - cur_rchunk->rc_position = htonl(pos); - cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); - cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); + cur_rchunk->rc_position = cpu_to_be32(pos); + cur_rchunk->rc_target.rs_handle = + cpu_to_be32(seg->mr_rkey); + cur_rchunk->rc_target.rs_length = + cpu_to_be32(seg->mr_len); xdr_encode_hyper( (__be32 *)&cur_rchunk->rc_target.rs_offset, seg->mr_base); @@ -222,8 +224,10 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, cur_rchunk++; r_xprt->rx_stats.read_chunk_count++; } else { /* write/reply */ - cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); - cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); + cur_wchunk->wc_target.rs_handle = + cpu_to_be32(seg->mr_rkey); + cur_wchunk->wc_target.rs_length = + cpu_to_be32(seg->mr_len); xdr_encode_hyper( (__be32 *)&cur_wchunk->wc_target.rs_offset, seg->mr_base); @@ -257,7 +261,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, *iptr++ = xdr_zero; /* encode a NULL reply chunk */ } else { warray->wc_discrim = xdr_one; - warray->wc_nchunks = htonl(nchunks); + warray->wc_nchunks = cpu_to_be32(nchunks); iptr = (__be32 *) cur_wchunk; if (type == rpcrdma_writech) { *iptr++ = xdr_zero; /* finish the write chunk list */ @@ -290,7 +294,7 @@ ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) { struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base; + struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); if (req->rl_rtype != rpcrdma_noch) result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, @@ -402,13 +406,12 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) base = rqst->rq_svec[0].iov_base; rpclen = rqst->rq_svec[0].iov_len; - /* build RDMA header in private area at front */ - headerp = (struct rpcrdma_msg *) req->rl_base; - /* don't htonl XID, it's already done in request */ + headerp = rdmab_to_msg(req->rl_rdmabuf); + /* don't byte-swap XID, it's already done in request */ headerp->rm_xid = rqst->rq_xid; - headerp->rm_vers = xdr_one; - headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); - headerp->rm_type = htonl(RDMA_MSG); + headerp->rm_vers = rpcrdma_version; + headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); + headerp->rm_type = rdma_msg; /* * Chunks needed for results? @@ -468,7 +471,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) return -EIO; } - hdrlen = 28; /*sizeof *headerp;*/ + hdrlen = RPCRDMA_HDRLEN_MIN; padlen = 0; /* @@ -482,11 +485,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) RPCRDMA_INLINE_PAD_VALUE(rqst)); if (padlen) { - headerp->rm_type = htonl(RDMA_MSGP); + headerp->rm_type = rdma_msgp; headerp->rm_body.rm_padded.rm_align = - htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); + cpu_to_be32(RPCRDMA_INLINE_PAD_VALUE(rqst)); headerp->rm_body.rm_padded.rm_thresh = - htonl(RPCRDMA_INLINE_PAD_THRESH); + cpu_to_be32(RPCRDMA_INLINE_PAD_THRESH); headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; @@ -524,7 +527,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" " headerp 0x%p base 0x%p lkey 0x%x\n", __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, - headerp, base, req->rl_iov.lkey); + headerp, base, rdmab_lkey(req->rl_rdmabuf)); /* * initialize send_iov's - normally only two: rdma chunk header and @@ -533,26 +536,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * header and any write data. In all non-rdma cases, any following * data has been copied into the RPC header buffer. */ - req->rl_send_iov[0].addr = req->rl_iov.addr; + req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); req->rl_send_iov[0].length = hdrlen; - req->rl_send_iov[0].lkey = req->rl_iov.lkey; + req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); - req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); + req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = req->rl_iov.lkey; + req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); req->rl_niovs = 2; if (padlen) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; - req->rl_send_iov[2].addr = ep->rep_pad.addr; + req->rl_send_iov[2].addr = rdmab_addr(ep->rep_padbuf); req->rl_send_iov[2].length = padlen; - req->rl_send_iov[2].lkey = ep->rep_pad.lkey; + req->rl_send_iov[2].lkey = rdmab_lkey(ep->rep_padbuf); req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; req->rl_send_iov[3].length = rqst->rq_slen - rpclen; - req->rl_send_iov[3].lkey = req->rl_iov.lkey; + req->rl_send_iov[3].lkey = rdmab_lkey(req->rl_sendbuf); req->rl_niovs = 4; } @@ -569,8 +572,9 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b { unsigned int i, total_len; struct rpcrdma_write_chunk *cur_wchunk; + char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf); - i = ntohl(**iptrp); /* get array count */ + i = be32_to_cpu(**iptrp); if (i > max) return -1; cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); @@ -582,11 +586,11 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", __func__, - ntohl(seg->rs_length), + be32_to_cpu(seg->rs_length), (unsigned long long)off, - ntohl(seg->rs_handle)); + be32_to_cpu(seg->rs_handle)); } - total_len += ntohl(seg->rs_length); + total_len += be32_to_cpu(seg->rs_length); ++cur_wchunk; } /* check and adjust for properly terminated write chunk */ @@ -596,7 +600,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b return -1; cur_wchunk = (struct rpcrdma_write_chunk *) w; } - if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) + if ((char *)cur_wchunk > base + rep->rr_len) return -1; *iptrp = (__be32 *) cur_wchunk; @@ -691,7 +695,9 @@ rpcrdma_connect_worker(struct work_struct *work) { struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep, rep_connect_worker.work); - struct rpc_xprt *xprt = ep->rep_xprt; + struct rpcrdma_xprt *r_xprt = + container_of(ep, struct rpcrdma_xprt, rx_ep); + struct rpc_xprt *xprt = &r_xprt->rx_xprt; spin_lock_bh(&xprt->transport_lock); if (++xprt->connect_cookie == 0) /* maintain a reserved value */ @@ -732,7 +738,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpc_xprt *xprt = rep->rr_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); __be32 *iptr; - int rdmalen, status; + int credits, rdmalen, status; unsigned long cwnd; /* Check status. If bad, signal disconnect and return rep to pool */ @@ -744,14 +750,14 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) } return; } - if (rep->rr_len < 28) { + if (rep->rr_len < RPCRDMA_HDRLEN_MIN) { dprintk("RPC: %s: short/invalid reply\n", __func__); goto repost; } - headerp = (struct rpcrdma_msg *) rep->rr_base; - if (headerp->rm_vers != xdr_one) { + headerp = rdmab_to_msg(rep->rr_rdmabuf); + if (headerp->rm_vers != rpcrdma_version) { dprintk("RPC: %s: invalid version %d\n", - __func__, ntohl(headerp->rm_vers)); + __func__, be32_to_cpu(headerp->rm_vers)); goto repost; } @@ -762,7 +768,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) spin_unlock(&xprt->transport_lock); dprintk("RPC: %s: reply 0x%p failed " "to match any request xid 0x%08x len %d\n", - __func__, rep, headerp->rm_xid, rep->rr_len); + __func__, rep, be32_to_cpu(headerp->rm_xid), + rep->rr_len); repost: r_xprt->rx_stats.bad_reply_count++; rep->rr_func = rpcrdma_reply_handler; @@ -778,13 +785,14 @@ repost: spin_unlock(&xprt->transport_lock); dprintk("RPC: %s: duplicate reply 0x%p to RPC " "request 0x%p: xid 0x%08x\n", __func__, rep, req, - headerp->rm_xid); + be32_to_cpu(headerp->rm_xid)); goto repost; } dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" " RPC request 0x%p xid 0x%08x\n", - __func__, rep, req, rqst, headerp->rm_xid); + __func__, rep, req, rqst, + be32_to_cpu(headerp->rm_xid)); /* from here on, the reply is no longer an orphan */ req->rl_reply = rep; @@ -793,7 +801,7 @@ repost: /* check for expected message types */ /* The order of some of these tests is important. */ switch (headerp->rm_type) { - case htonl(RDMA_MSG): + case rdma_msg: /* never expect read chunks */ /* never expect reply chunks (two ways to check) */ /* never expect write chunks without having offered RDMA */ @@ -824,22 +832,24 @@ repost: } else { /* else ordinary inline */ rdmalen = 0; - iptr = (__be32 *)((unsigned char *)headerp + 28); - rep->rr_len -= 28; /*sizeof *headerp;*/ + iptr = (__be32 *)((unsigned char *)headerp + + RPCRDMA_HDRLEN_MIN); + rep->rr_len -= RPCRDMA_HDRLEN_MIN; status = rep->rr_len; } /* Fix up the rpc results for upper layer */ rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); break; - case htonl(RDMA_NOMSG): + case rdma_nomsg: /* never expect read or write chunks, always reply chunks */ if (headerp->rm_body.rm_chunks[0] != xdr_zero || headerp->rm_body.rm_chunks[1] != xdr_zero || headerp->rm_body.rm_chunks[2] != xdr_one || req->rl_nchunks == 0) goto badheader; - iptr = (__be32 *)((unsigned char *)headerp + 28); + iptr = (__be32 *)((unsigned char *)headerp + + RPCRDMA_HDRLEN_MIN); rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); if (rdmalen < 0) goto badheader; @@ -853,7 +863,7 @@ badheader: dprintk("%s: invalid rpcrdma reply header (type %d):" " chunks[012] == %d %d %d" " expected chunks <= %d\n", - __func__, ntohl(headerp->rm_type), + __func__, be32_to_cpu(headerp->rm_type), headerp->rm_body.rm_chunks[0], headerp->rm_body.rm_chunks[1], headerp->rm_body.rm_chunks[2], @@ -863,8 +873,14 @@ badheader: break; } + credits = be32_to_cpu(headerp->rm_credit); + if (credits == 0) + credits = 1; /* don't deadlock */ + else if (credits > r_xprt->rx_buf.rb_max_requests) + credits = r_xprt->rx_buf.rb_max_requests; + cwnd = xprt->cwnd; - xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; + xprt->cwnd = credits << RPC_CWNDSHIFT; if (xprt->cwnd > cwnd) xprt_release_rqst_cong(rqst->rq_task); diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 65b146297f5a..b681855cf970 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -71,22 +71,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend) } /* - * Determine number of chunks and total bytes in chunk list. The chunk - * list has already been verified to fit within the RPCRDMA header. - */ -void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, - int *ch_count, int *byte_count) -{ - /* compute the number of bytes represented by read chunks */ - *byte_count = 0; - *ch_count = 0; - for (; ch->rc_discrim != 0; ch++) { - *byte_count = *byte_count + ntohl(ch->rc_target.rs_length); - *ch_count = *ch_count + 1; - } -} - -/* * Decodes a write chunk list. The expected format is as follows: * descrim : xdr_one * nchunks : <count> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index e0110270d650..f9f13a32ddb8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -43,7 +43,6 @@ #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> #include <linux/spinlock.h> -#include <linux/highmem.h> #include <asm/unaligned.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -60,6 +59,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *ctxt, u32 byte_count) { + struct rpcrdma_msg *rmsgp; struct page *page; u32 bc; int sge_no; @@ -82,7 +82,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, /* If data remains, store it in the pagelist */ rqstp->rq_arg.page_len = bc; rqstp->rq_arg.page_base = 0; - rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; + if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG) + rqstp->rq_arg.pages = &rqstp->rq_pages[0]; + else + rqstp->rq_arg.pages = &rqstp->rq_pages[1]; + sge_no = 1; while (bc && sge_no < ctxt->count) { page = ctxt->pages[sge_no]; @@ -95,14 +102,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_respages = &rqstp->rq_pages[sge_no]; rqstp->rq_next_page = rqstp->rq_respages + 1; - /* We should never run out of SGE because the limit is defined to - * support the max allowed RPC data length - */ - BUG_ON(bc && (sge_no == ctxt->count)); - BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) - != byte_count); - BUG_ON(rqstp->rq_arg.len != byte_count); - /* If not all pages were used from the SGL, free the remaining ones */ bc = sge_no; while (sge_no < ctxt->count) { @@ -125,26 +124,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) return min_t(int, sge_count, xprt->sc_max_sge); } -typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last); - /* Issue an RDMA_READ using the local lkey to map the data sink */ -static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; @@ -229,15 +218,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, } /* Issue an RDMA_READ using an FRMR to map the data sink */ -static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, - struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *head, - int *page_no, - u32 *page_offset, - u32 rs_handle, - u32 rs_length, - u64 rs_offset, - int last) +int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + int *page_no, + u32 *page_offset, + u32 rs_handle, + u32 rs_length, + u64 rs_offset, + bool last) { struct ib_send_wr read_wr; struct ib_send_wr inv_wr; @@ -365,24 +354,84 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, return ret; } +static unsigned int +rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch) +{ + unsigned int count; + + for (count = 0; ch->rc_discrim != xdr_zero; ch++) + count++; + return count; +} + +/* If there was additional inline content, append it to the end of arg.pages. + * Tail copy has to be done after the reader function has determined how many + * pages are needed for RDMA READ. + */ +static int +rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, + u32 position, u32 byte_count, u32 page_offset, int page_no) +{ + char *srcp, *destp; + int ret; + + ret = 0; + srcp = head->arg.head[0].iov_base + position; + byte_count = head->arg.head[0].iov_len - position; + if (byte_count > PAGE_SIZE) { + dprintk("svcrdma: large tail unsupported\n"); + return 0; + } + + /* Fit as much of the tail on the current page as possible */ + if (page_offset != PAGE_SIZE) { + destp = page_address(rqstp->rq_arg.pages[page_no]); + destp += page_offset; + while (byte_count--) { + *destp++ = *srcp++; + page_offset++; + if (page_offset == PAGE_SIZE && byte_count) + goto more; + } + goto done; + } + +more: + /* Fit the rest on the next page */ + page_no++; + destp = page_address(rqstp->rq_arg.pages[page_no]); + while (byte_count--) + *destp++ = *srcp++; + + rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + +done: + byte_count = head->arg.head[0].iov_len - position; + head->arg.page_len += byte_count; + head->arg.len += byte_count; + head->arg.buflen += byte_count; + return 1; +} + static int rdma_read_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { - int page_no, ch_count, ret; + int page_no, ret; struct rpcrdma_read_chunk *ch; - u32 page_offset, byte_count; + u32 handle, page_offset, byte_count; + u32 position; u64 rs_offset; - rdma_reader_fn reader; + bool last; /* If no read list is present, return 0 */ ch = svc_rdma_get_read_chunk(rmsgp); if (!ch) return 0; - svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); - if (ch_count > RPCSVC_MAXPAGES) + if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES) return -EINVAL; /* The request is completed when the RDMA_READs complete. The @@ -391,34 +440,41 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, */ head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; - head->arg.pages = &head->pages[head->count]; head->hdr_count = head->count; head->arg.page_base = 0; head->arg.page_len = 0; head->arg.len = rqstp->rq_arg.len; head->arg.buflen = rqstp->rq_arg.buflen; - /* Use FRMR if supported */ - if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) - reader = rdma_read_chunk_frmr; - else - reader = rdma_read_chunk_lcl; + ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + position = be32_to_cpu(ch->rc_position); + + /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */ + if (position == 0) { + head->arg.pages = &head->pages[0]; + page_offset = head->byte_len; + } else { + head->arg.pages = &head->pages[head->count]; + page_offset = 0; + } - page_no = 0; page_offset = 0; - for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - ch->rc_discrim != 0; ch++) { + ret = 0; + page_no = 0; + for (; ch->rc_discrim != xdr_zero; ch++) { + if (be32_to_cpu(ch->rc_position) != position) + goto err; + handle = be32_to_cpu(ch->rc_target.rs_handle), + byte_count = be32_to_cpu(ch->rc_target.rs_length); xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, &rs_offset); - byte_count = ntohl(ch->rc_target.rs_length); while (byte_count > 0) { - ret = reader(xprt, rqstp, head, - &page_no, &page_offset, - ntohl(ch->rc_target.rs_handle), - byte_count, rs_offset, - ((ch+1)->rc_discrim == 0) /* last */ - ); + last = (ch + 1)->rc_discrim == xdr_zero; + ret = xprt->sc_reader(xprt, rqstp, head, + &page_no, &page_offset, + handle, byte_count, + rs_offset, last); if (ret < 0) goto err; byte_count -= ret; @@ -426,7 +482,24 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, head->arg.buflen += ret; } } + + /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */ + if (page_offset & 3) { + u32 pad = 4 - (page_offset & 3); + + head->arg.page_len += pad; + head->arg.len += pad; + head->arg.buflen += pad; + page_offset += pad; + } + ret = 1; + if (position && position < head->arg.head[0].iov_len) + ret = rdma_copy_tail(rqstp, head, position, + byte_count, page_offset, page_no); + head->arg.head[0].iov_len = position; + head->position = position; + err: /* Detach arg pages. svc_recv will replenish them */ for (page_no = 0; @@ -436,47 +509,33 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, return ret; } -/* - * To avoid a separate RDMA READ just for a handful of zero bytes, - * RFC 5666 section 3.7 allows the client to omit the XDR zero pad - * in chunk lists. - */ -static void -rdma_fix_xdr_pad(struct xdr_buf *buf) -{ - unsigned int page_len = buf->page_len; - unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len; - unsigned int offset, pg_no; - char *p; - - if (size == 0) - return; - - pg_no = page_len >> PAGE_SHIFT; - offset = page_len & ~PAGE_MASK; - p = page_address(buf->pages[pg_no]); - memset(p + offset, 0, size); - - buf->page_len += size; - buf->buflen += size; - buf->len += size; -} - static int rdma_read_complete(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { int page_no; int ret; - BUG_ON(!head); - /* Copy RPC pages */ for (page_no = 0; page_no < head->count; page_no++) { put_page(rqstp->rq_pages[page_no]); rqstp->rq_pages[page_no] = head->pages[page_no]; } + + /* Adjustments made for RDMA_NOMSG type requests */ + if (head->position == 0) { + if (head->arg.len <= head->sge[0].length) { + head->arg.head[0].iov_len = head->arg.len - + head->byte_len; + head->arg.page_len = 0; + } else { + head->arg.head[0].iov_len = head->sge[0].length - + head->byte_len; + head->arg.page_len = head->arg.len - + head->sge[0].length; + } + } + /* Point rq_arg.pages past header */ - rdma_fix_xdr_pad(&head->arg); rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; rqstp->rq_arg.page_len = head->arg.page_len; rqstp->rq_arg.page_base = head->arg.page_base; @@ -501,8 +560,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + rqstp->rq_arg.tail[0].iov_len; - dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " - "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", + dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, " + "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n", ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); @@ -558,7 +617,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) } dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", ctxt, rdma_xprt, rqstp, ctxt->wc_status); - BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); atomic_inc(&rdma_stat_recv); /* Build up the XDR from the receive buffers. */ @@ -591,8 +649,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) + rqstp->rq_arg.tail[0].iov_len; svc_rdma_put_context(ctxt, 0); out: - dprintk("svcrdma: ret = %d, rq_arg.len =%d, " - "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", + dprintk("svcrdma: ret=%d, rq_arg.len=%u, " + "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n", ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9f1b50689c0f..7de33d1af9b6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt, u32 page_off; int page_no; - BUG_ON(xdr->len != - (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); + if (xdr->len != + (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { + pr_err("svcrdma: map_xdr: XDR buffer length error\n"); + return -EIO; + } /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; @@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, int bc; struct svc_rdma_op_ctxt *ctxt; - BUG_ON(vec->count > RPCSVC_MAXPAGES); + if (vec->count > RPCSVC_MAXPAGES) { + pr_err("svcrdma: Too many pages (%lu)\n", vec->count); + return -EIO; + } + dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, vec->sge=%p, vec->count=%lu\n", rmr, (unsigned long long)to, xdr_off, @@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_off = 0; sge_no++; xdr_sge_no++; - BUG_ON(xdr_sge_no > vec->count); + if (xdr_sge_no > vec->count) { + pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no); + goto err; + } bc -= sge_bytes; if (sge_no == xprt->sc_max_sge) break; @@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } - BUG_ON(byte_count != 0); + if (byte_count != 0) { + pr_err("svcrdma: Could not map %d bytes\n", byte_count); + goto err; + } /* Save all respages in the ctxt and remove them from the * respages array. They are our pages until the I/O @@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma, } rqstp->rq_next_page = rqstp->rq_respages + 1; - BUG_ON(sge_no > rdma->sc_max_sge); + if (sge_no > rdma->sc_max_sge) { + pr_err("svcrdma: Too many sges (%d)\n", sge_no); + goto err; + } memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; @@ -467,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) { } -/* - * Return the start of an xdr buffer. - */ -static void *xdr_start(struct xdr_buf *xdr) -{ - return xdr->head[0].iov_base - - (xdr->len - - xdr->page_len - - xdr->tail[0].iov_len - - xdr->head[0].iov_len); -} - int svc_rdma_sendto(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; @@ -496,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); - /* Get the RDMA request header. */ - rdma_argp = xdr_start(&rqstp->rq_arg); + /* Get the RDMA request header. The receive logic always + * places this at the start of page 0. + */ + rdma_argp = page_address(rqstp->rq_pages[0]); /* Build an req vec for the XDR */ ctxt = svc_rdma_get_context(rdma); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4e618808bc98..f609c1c2d38d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) struct svcxprt_rdma *xprt; int i; - BUG_ON(!ctxt); xprt = ctxt->xprt; if (free_pages) for (i = 0; i < ctxt->count; i++) @@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt, switch (ctxt->wr_op) { case IB_WR_SEND: - BUG_ON(ctxt->frmr); + if (ctxt->frmr) + pr_err("svcrdma: SEND: ctxt->frmr != NULL\n"); svc_rdma_put_context(ctxt, 1); break; case IB_WR_RDMA_WRITE: - BUG_ON(ctxt->frmr); + if (ctxt->frmr) + pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n"); svc_rdma_put_context(ctxt, 0); break; @@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt, svc_rdma_put_frmr(xprt, ctxt->frmr); if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - spin_lock_bh(&xprt->sc_rq_dto_lock); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_rq_dto_lock); + if (read_hdr) { + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + } else { + pr_err("svcrdma: ctxt->read_hdr == NULL\n"); + } svc_xprt_enqueue(&xprt->sc_xprt); } svc_rdma_put_context(ctxt, 0); break; default: - BUG_ON(1); printk(KERN_ERR "svcrdma: unexpected completion type, " "opcode=%d\n", ctxt->wr_op); @@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) buflen = 0; ctxt->direction = DMA_FROM_DEVICE; for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { - BUG_ON(sge_no >= xprt->sc_max_sge); + if (sge_no >= xprt->sc_max_sge) { + pr_err("svcrdma: Too many sges (%d)\n", sge_no); + goto err_put_ctxt; + } page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, @@ -687,7 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, { struct rdma_cm_id *listen_id; struct svcxprt_rdma *cma_xprt; - struct svc_xprt *xprt; int ret; dprintk("svcrdma: Creating RDMA socket\n"); @@ -698,7 +703,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) return ERR_PTR(-ENOMEM); - xprt = &cma_xprt->sc_xprt; listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, IB_QPT_RC); @@ -822,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, if (frmr) { frmr_unmap_dma(rdma, frmr); spin_lock_bh(&rdma->sc_frmr_q_lock); - BUG_ON(!list_empty(&frmr->frmr_list)); + WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); spin_unlock_bh(&rdma->sc_frmr_q_lock); } @@ -970,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) * NB: iWARP requires remote write access for the data sink * of an RDMA_READ. IB does not. */ + newxprt->sc_reader = rdma_read_chunk_lcl; if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { newxprt->sc_frmr_pg_list_len = devattr.max_fast_reg_page_list_len; newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; + newxprt->sc_reader = rdma_read_chunk_frmr; } /* @@ -1125,7 +1131,9 @@ static void __svc_rdma_free(struct work_struct *work) dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); /* We should only be called from kref_put */ - BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); + if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0) + pr_err("svcrdma: sc_xprt still in use? (%d)\n", + atomic_read(&rdma->sc_xprt.xpt_ref.refcount)); /* * Destroy queued, but not processed read completions. Note @@ -1153,8 +1161,12 @@ static void __svc_rdma_free(struct work_struct *work) } /* Warn if we leaked a resource or under-referenced */ - WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); - WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + if (atomic_read(&rdma->sc_ctxt_used) != 0) + pr_err("svcrdma: ctxt still in use? (%d)\n", + atomic_read(&rdma->sc_ctxt_used)); + if (atomic_read(&rdma->sc_dma_used) != 0) + pr_err("svcrdma: dma still in use? (%d)\n", + atomic_read(&rdma->sc_dma_used)); /* De-allocate fastreg mr */ rdma_dealloc_frmr_q(rdma); @@ -1254,7 +1266,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return -ENOTCONN; - BUG_ON(wr->send_flags != IB_SEND_SIGNALED); wr_count = 1; for (n_wr = wr->next; n_wr; n_wr = n_wr->next) wr_count++; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index bbd6155d3e34..2e192baa59f3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -200,9 +200,9 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt) static void xprt_rdma_connect_worker(struct work_struct *work) { - struct rpcrdma_xprt *r_xprt = - container_of(work, struct rpcrdma_xprt, rdma_connect.work); - struct rpc_xprt *xprt = &r_xprt->xprt; + struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, + rx_connect_worker.work); + struct rpc_xprt *xprt = &r_xprt->rx_xprt; int rc = 0; xprt_clear_connected(xprt); @@ -235,7 +235,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) dprintk("RPC: %s: called\n", __func__); - cancel_delayed_work_sync(&r_xprt->rdma_connect); + cancel_delayed_work_sync(&r_xprt->rx_connect_worker); xprt_clear_connected(xprt); @@ -364,8 +364,7 @@ xprt_setup_rdma(struct xprt_create *args) * any inline data. Also specify any padding which will be provided * from a preregistered zero buffer. */ - rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, - &new_xprt->rx_data); + rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out3; @@ -374,9 +373,8 @@ xprt_setup_rdma(struct xprt_create *args) * connection loss notification is async. We also catch connection loss * when reaping receives. */ - INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); - new_ep->rep_func = rpcrdma_conn_func; - new_ep->rep_xprt = xprt; + INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, + xprt_rdma_connect_worker); xprt_rdma_format_addresses(xprt); xprt->max_payload = rpcrdma_max_payload(new_xprt); @@ -434,94 +432,101 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) if (r_xprt->rx_ep.rep_connected != 0) { /* Reconnect */ - schedule_delayed_work(&r_xprt->rdma_connect, - xprt->reestablish_timeout); + schedule_delayed_work(&r_xprt->rx_connect_worker, + xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; } else { - schedule_delayed_work(&r_xprt->rdma_connect, 0); + schedule_delayed_work(&r_xprt->rx_connect_worker, 0); if (!RPC_IS_ASYNC(task)) - flush_delayed_work(&r_xprt->rdma_connect); + flush_delayed_work(&r_xprt->rx_connect_worker); } } /* * The RDMA allocate/free functions need the task structure as a place * to hide the struct rpcrdma_req, which is necessary for the actual send/recv - * sequence. For this reason, the recv buffers are attached to send - * buffers for portions of the RPC. Note that the RPC layer allocates - * both send and receive buffers in the same call. We may register - * the receive buffer portion when using reply chunks. + * sequence. + * + * The RPC layer allocates both send and receive buffers in the same call + * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer). + * We may register rq_rcv_buf when using reply chunks. */ static void * xprt_rdma_allocate(struct rpc_task *task, size_t size) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - struct rpcrdma_req *req, *nreq; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_regbuf *rb; + struct rpcrdma_req *req; + size_t min_size; + gfp_t flags; - req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); + req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) return NULL; - if (size > req->rl_size) { - dprintk("RPC: %s: size %zd too large for buffer[%zd]: " - "prog %d vers %d proc %d\n", - __func__, size, req->rl_size, - task->tk_client->cl_prog, task->tk_client->cl_vers, - task->tk_msg.rpc_proc->p_proc); - /* - * Outgoing length shortage. Our inline write max must have - * been configured to perform direct i/o. - * - * This is therefore a large metadata operation, and the - * allocate call was made on the maximum possible message, - * e.g. containing long filename(s) or symlink data. In - * fact, while these metadata operations *might* carry - * large outgoing payloads, they rarely *do*. However, we - * have to commit to the request here, so reallocate and - * register it now. The data path will never require this - * reallocation. - * - * If the allocation or registration fails, the RPC framework - * will (doggedly) retry. - */ - if (task->tk_flags & RPC_TASK_SWAPPER) - nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); - else - nreq = kmalloc(sizeof *req + size, GFP_NOFS); - if (nreq == NULL) - goto outfail; - - if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, - nreq->rl_base, size + sizeof(struct rpcrdma_req) - - offsetof(struct rpcrdma_req, rl_base), - &nreq->rl_handle, &nreq->rl_iov)) { - kfree(nreq); - goto outfail; - } - rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; - nreq->rl_size = size; - nreq->rl_niovs = 0; - nreq->rl_nchunks = 0; - nreq->rl_buffer = (struct rpcrdma_buffer *)req; - nreq->rl_reply = req->rl_reply; - memcpy(nreq->rl_segments, - req->rl_segments, sizeof nreq->rl_segments); - /* flag the swap with an unused field */ - nreq->rl_iov.length = 0; - req->rl_reply = NULL; - req = nreq; - } + flags = GFP_NOIO | __GFP_NOWARN; + if (RPC_IS_SWAPPER(task)) + flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + + if (req->rl_rdmabuf == NULL) + goto out_rdmabuf; + if (req->rl_sendbuf == NULL) + goto out_sendbuf; + if (size > req->rl_sendbuf->rg_size) + goto out_sendbuf; + +out: dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); req->rl_connect_cookie = 0; /* our reserved value */ - return req->rl_xdr_buf; - -outfail: + return req->rl_sendbuf->rg_base; + +out_rdmabuf: + min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); + rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags); + if (IS_ERR(rb)) + goto out_fail; + req->rl_rdmabuf = rb; + +out_sendbuf: + /* XDR encoding and RPC/RDMA marshaling of this request has not + * yet occurred. Thus a lower bound is needed to prevent buffer + * overrun during marshaling. + * + * RPC/RDMA marshaling may choose to send payload bearing ops + * inline, if the result is smaller than the inline threshold. + * The value of the "size" argument accounts for header + * requirements but not for the payload in these cases. + * + * Likewise, allocate enough space to receive a reply up to the + * size of the inline threshold. + * + * It's unlikely that both the send header and the received + * reply will be large, but slush is provided here to allow + * flexibility when marshaling. + */ + min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp); + min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); + if (size < min_size) + size = min_size; + + rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags); + if (IS_ERR(rb)) + goto out_fail; + rb->rg_owner = req; + + r_xprt->rx_stats.hardway_register_count += size; + rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf); + req->rl_sendbuf = rb; + goto out; + +out_fail: rpcrdma_buffer_put(req); - rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + r_xprt->rx_stats.failed_marshal_count++; return NULL; } @@ -533,47 +538,24 @@ xprt_rdma_free(void *buffer) { struct rpcrdma_req *req; struct rpcrdma_xprt *r_xprt; - struct rpcrdma_rep *rep; + struct rpcrdma_regbuf *rb; int i; if (buffer == NULL) return; - req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); - if (req->rl_iov.length == 0) { /* see allocate above */ - r_xprt = container_of(((struct rpcrdma_req *) req->rl_buffer)->rl_buffer, - struct rpcrdma_xprt, rx_buf); - } else - r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); - rep = req->rl_reply; + rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); + req = rb->rg_owner; + r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); - dprintk("RPC: %s: called on 0x%p%s\n", - __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); + dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - /* - * Finish the deregistration. The process is considered - * complete when the rr_func vector becomes NULL - this - * was put in place during rpcrdma_reply_handler() - the wait - * call below will not block if the dereg is "done". If - * interrupted, our framework will clean up. - */ for (i = 0; req->rl_nchunks;) { --req->rl_nchunks; i += rpcrdma_deregister_external( &req->rl_segments[i], r_xprt); } - if (req->rl_iov.length == 0) { /* see allocate above */ - struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; - oreq->rl_reply = req->rl_reply; - (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, - req->rl_handle, - &req->rl_iov); - kfree(req); - req = oreq; - } - - /* Put back request+reply buffers */ rpcrdma_buffer_put(req); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c98e40643910..124676c13780 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -49,6 +49,7 @@ #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/prefetch.h> #include <asm/bitops.h> #include "xprt_rdma.h" @@ -153,7 +154,7 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); } } @@ -168,23 +169,59 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); } } +static const char * const wc_status[] = { + "success", + "local length error", + "local QP operation error", + "local EE context operation error", + "local protection error", + "WR flushed", + "memory management operation error", + "bad response error", + "local access error", + "remote invalid request error", + "remote access error", + "remote operation error", + "transport retry counter exceeded", + "RNR retrycounter exceeded", + "local RDD violation error", + "remove invalid RD request", + "operation aborted", + "invalid EE context number", + "invalid EE context state", + "fatal error", + "response timeout error", + "general error", +}; + +#define COMPLETION_MSG(status) \ + ((status) < ARRAY_SIZE(wc_status) ? \ + wc_status[(status)] : "unexpected completion error") + static void rpcrdma_sendcq_process_wc(struct ib_wc *wc) { - struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + if (likely(wc->status == IB_WC_SUCCESS)) + return; - dprintk("RPC: %s: frmr %p status %X opcode %d\n", - __func__, frmr, wc->status, wc->opcode); + /* WARNING: Only wr_id and status are reliable at this point */ + if (wc->wr_id == 0ULL) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("RPC: %s: SEND: %s\n", + __func__, COMPLETION_MSG(wc->status)); + } else { + struct rpcrdma_mw *r; - if (wc->wr_id == 0ULL) - return; - if (wc->status != IB_WC_SUCCESS) - frmr->r.frmr.fr_state = FRMR_IS_STALE; + r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + r->r.frmr.fr_state = FRMR_IS_STALE; + pr_err("RPC: %s: frmr %p (stale): %s\n", + __func__, r, COMPLETION_MSG(wc->status)); + } } static int @@ -248,33 +285,32 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", - __func__, rep, wc->status, wc->opcode, wc->byte_len); + /* WARNING: Only wr_id and status are reliable at this point */ + if (wc->status != IB_WC_SUCCESS) + goto out_fail; - if (wc->status != IB_WC_SUCCESS) { - rep->rr_len = ~0U; - goto out_schedule; - } + /* status == SUCCESS means all fields in wc are trustworthy */ if (wc->opcode != IB_WC_RECV) return; + dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", + __func__, rep, wc->byte_len); + rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, - rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - - if (rep->rr_len >= 16) { - struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; - unsigned int credits = ntohl(p->rm_credit); - - if (credits == 0) - credits = 1; /* don't deadlock */ - else if (credits > rep->rr_buffer->rb_max_requests) - credits = rep->rr_buffer->rb_max_requests; - atomic_set(&rep->rr_buffer->rb_credits, credits); - } + rdmab_addr(rep->rr_rdmabuf), + rep->rr_len, DMA_FROM_DEVICE); + prefetch(rdmab_to_msg(rep->rr_rdmabuf)); out_schedule: list_add_tail(&rep->rr_list, sched_list); + return; +out_fail: + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("RPC: %s: rep %p: %s\n", + __func__, rep, COMPLETION_MSG(wc->status)); + rep->rr_len = ~0U; + goto out_schedule; } static int @@ -390,8 +426,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; #endif - struct ib_qp_attr attr; - struct ib_qp_init_attr iattr; + struct ib_qp_attr *attr = &ia->ri_qp_attr; + struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; int connstate = 0; switch (event->event) { @@ -414,12 +450,13 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ESTABLISHED: connstate = 1; - ib_query_qp(ia->ri_id->qp, &attr, - IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, - &iattr); + ib_query_qp(ia->ri_id->qp, attr, + IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, + iattr); dprintk("RPC: %s: %d responder resources" " (%d initiator)\n", - __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); + __func__, attr->max_dest_rd_atomic, + attr->max_rd_atomic); goto connected; case RDMA_CM_EVENT_CONNECT_ERROR: connstate = -ENOTCONN; @@ -436,11 +473,10 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: connstate = -ENODEV; connected: - atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); /*FALLTHROUGH*/ default: @@ -453,7 +489,7 @@ connected: #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (connstate == 1) { - int ird = attr.max_dest_rd_atomic; + int ird = attr->max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; printk(KERN_INFO "rpcrdma: connection to %pI4:%u " "on %s, memreg %d slots %d ird %d%s\n", @@ -554,8 +590,8 @@ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { int rc, mem_priv; - struct ib_device_attr devattr; struct rpcrdma_ia *ia = &xprt->rx_ia; + struct ib_device_attr *devattr = &ia->ri_devattr; ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { @@ -571,26 +607,21 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) goto out2; } - /* - * Query the device to determine if the requested memory - * registration strategy is supported. If it isn't, set the - * strategy to a globally supported model. - */ - rc = ib_query_device(ia->ri_id->device, &devattr); + rc = ib_query_device(ia->ri_id->device, devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); - goto out2; + goto out3; } - if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { + if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { /* Requires both frmr reg and local dma lkey */ - if ((devattr.device_cap_flags & + if ((devattr->device_cap_flags & (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { dprintk("RPC: %s: FRMR registration " @@ -600,7 +631,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) /* Mind the ia limit on FRMR page list depth */ ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - devattr.max_fast_reg_page_list_len); + devattr->max_fast_reg_page_list_len); } } if (memreg == RPCRDMA_MTHCAFMR) { @@ -638,14 +669,14 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) "phys register failed with %lX\n", __func__, PTR_ERR(ia->ri_bind_mem)); rc = -ENOMEM; - goto out2; + goto out3; } break; default: printk(KERN_ERR "RPC: Unsupported memory " "registration mode: %d\n", memreg); rc = -ENOMEM; - goto out2; + goto out3; } dprintk("RPC: %s: memory registration strategy is %d\n", __func__, memreg); @@ -655,6 +686,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) rwlock_init(&ia->ri_qplock); return 0; + +out3: + ib_dealloc_pd(ia->ri_pd); + ia->ri_pd = NULL; out2: rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; @@ -698,20 +733,13 @@ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr devattr; + struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; int rc, err; - rc = ib_query_device(ia->ri_id->device, &devattr); - if (rc) { - dprintk("RPC: %s: ib_query_device failed %d\n", - __func__, rc); - return rc; - } - /* check provider's send/recv wr limits */ - if (cdata->max_requests > devattr.max_qp_wr) - cdata->max_requests = devattr.max_qp_wr; + if (cdata->max_requests > devattr->max_qp_wr) + cdata->max_requests = devattr->max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; @@ -746,8 +774,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } ep->rep_attr.cap.max_send_wr *= depth; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { - cdata->max_requests = devattr.max_qp_wr / depth; + if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { + cdata->max_requests = devattr->max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * @@ -766,6 +794,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; + if (cdata->padding) { + ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding, + GFP_KERNEL); + if (IS_ERR(ep->rep_padbuf)) + return PTR_ERR(ep->rep_padbuf); + } else + ep->rep_padbuf = NULL; + dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, @@ -781,7 +817,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); - ep->rep_ia = ia; init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); @@ -831,10 +866,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; - if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ + if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else - ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; + ep->rep_remote_cma.responder_resources = + devattr->max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; @@ -848,6 +884,7 @@ out2: dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: + rpcrdma_free_regbuf(ia, ep->rep_padbuf); return rc; } @@ -874,11 +911,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ia->ri_id->qp = NULL; } - /* padding - could be done in rpcrdma_buffer_destroy... */ - if (ep->rep_pad_mr) { - rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); - ep->rep_pad_mr = NULL; - } + rpcrdma_free_regbuf(ia, ep->rep_padbuf); rpcrdma_clean_cq(ep->rep_attr.recv_cq); rc = ib_destroy_cq(ep->rep_attr.recv_cq); @@ -1048,6 +1081,48 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) } } +static struct rpcrdma_req * +rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_req *req; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (req == NULL) + return ERR_PTR(-ENOMEM); + + req->rl_buffer = &r_xprt->rx_buf; + return req; +} + +static struct rpcrdma_rep * +rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_rep *rep; + int rc; + + rc = -ENOMEM; + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (rep == NULL) + goto out; + + rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize, + GFP_KERNEL); + if (IS_ERR(rep->rr_rdmabuf)) { + rc = PTR_ERR(rep->rr_rdmabuf); + goto out_free; + } + + rep->rr_buffer = &r_xprt->rx_buf; + return rep; + +out_free: + kfree(rep); +out: + return ERR_PTR(rc); +} + static int rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) { @@ -1134,27 +1209,26 @@ out_free: } int -rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, - struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) +rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; char *p; - size_t len, rlen, wlen; + size_t len; int i, rc; buf->rb_max_requests = cdata->max_requests; spin_lock_init(&buf->rb_lock); - atomic_set(&buf->rb_credits, 1); /* Need to allocate: * 1. arrays for send and recv pointers * 2. arrays of struct rpcrdma_req to fill in pointers * 3. array of struct rpcrdma_rep for replies - * 4. padding, if any * Send/recv buffers in req/rep need to be registered */ len = buf->rb_max_requests * (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); - len += cdata->padding; p = kzalloc(len, GFP_KERNEL); if (p == NULL) { @@ -1170,17 +1244,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, buf->rb_recv_bufs = (struct rpcrdma_rep **) p; p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; - /* - * Register the zeroed pad buffer, if any. - */ - if (cdata->padding) { - rc = rpcrdma_register_internal(ia, p, cdata->padding, - &ep->rep_pad_mr, &ep->rep_pad); - if (rc) - goto out; - } - p += cdata->padding; - INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); switch (ia->ri_memreg_strategy) { @@ -1198,62 +1261,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, break; } - /* - * Allocate/init the request/reply buffers. Doing this - * using kmalloc for now -- one for each buf. - */ - wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req)); - rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep)); - dprintk("RPC: %s: wlen = %zu, rlen = %zu\n", - __func__, wlen, rlen); - for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; struct rpcrdma_rep *rep; - req = kmalloc(wlen, GFP_KERNEL); - if (req == NULL) { + req = rpcrdma_create_req(r_xprt); + if (IS_ERR(req)) { dprintk("RPC: %s: request buffer %d alloc" " failed\n", __func__, i); - rc = -ENOMEM; + rc = PTR_ERR(req); goto out; } - memset(req, 0, sizeof(struct rpcrdma_req)); buf->rb_send_bufs[i] = req; - buf->rb_send_bufs[i]->rl_buffer = buf; - rc = rpcrdma_register_internal(ia, req->rl_base, - wlen - offsetof(struct rpcrdma_req, rl_base), - &buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - if (rc) - goto out; - - buf->rb_send_bufs[i]->rl_size = wlen - - sizeof(struct rpcrdma_req); - - rep = kmalloc(rlen, GFP_KERNEL); - if (rep == NULL) { + rep = rpcrdma_create_rep(r_xprt); + if (IS_ERR(rep)) { dprintk("RPC: %s: reply buffer %d alloc failed\n", __func__, i); - rc = -ENOMEM; + rc = PTR_ERR(rep); goto out; } - memset(rep, 0, sizeof(struct rpcrdma_rep)); buf->rb_recv_bufs[i] = rep; - buf->rb_recv_bufs[i]->rr_buffer = buf; - - rc = rpcrdma_register_internal(ia, rep->rr_base, - rlen - offsetof(struct rpcrdma_rep, rr_base), - &buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - if (rc) - goto out; - } - dprintk("RPC: %s: max_requests %d\n", - __func__, buf->rb_max_requests); - /* done */ + return 0; out: rpcrdma_buffer_destroy(buf); @@ -1261,6 +1291,27 @@ out: } static void +rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) +{ + if (!rep) + return; + + rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); + kfree(rep); +} + +static void +rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + if (!req) + return; + + rpcrdma_free_regbuf(ia, req->rl_sendbuf); + rpcrdma_free_regbuf(ia, req->rl_rdmabuf); + kfree(req); +} + +static void rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) { struct rpcrdma_mw *r; @@ -1315,18 +1366,10 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) dprintk("RPC: %s: entering\n", __func__); for (i = 0; i < buf->rb_max_requests; i++) { - if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { - rpcrdma_deregister_internal(ia, - buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - kfree(buf->rb_recv_bufs[i]); - } - if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { - rpcrdma_deregister_internal(ia, - buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - kfree(buf->rb_send_bufs[i]); - } + if (buf->rb_recv_bufs) + rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]); + if (buf->rb_send_bufs) + rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); } switch (ia->ri_memreg_strategy) { @@ -1450,8 +1493,8 @@ rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) int i; for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) - rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf); - rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf); + rpcrdma_buffer_put_mr(&seg->rl_mw, buf); + rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); } static void @@ -1537,7 +1580,7 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, list_add(&r->mw_list, stale); continue; } - req->rl_segments[i].mr_chunk.rl_mw = r; + req->rl_segments[i].rl_mw = r; if (unlikely(i-- == 0)) return req; /* Success */ } @@ -1559,7 +1602,7 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) r = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); - req->rl_segments[i].mr_chunk.rl_mw = r; + req->rl_segments[i].rl_mw = r; if (unlikely(i-- == 0)) return req; /* Success */ } @@ -1658,8 +1701,6 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; unsigned long flags; - if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ - buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; spin_lock_irqsave(&buffers->rb_lock, flags); if (buffers->rb_recv_index < buffers->rb_max_requests) { req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; @@ -1688,7 +1729,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) * Wrappers for internal-use kmalloc memory registration, used by buffer code. */ -int +static int rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, struct ib_mr **mrp, struct ib_sge *iov) { @@ -1739,7 +1780,7 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, return rc; } -int +static int rpcrdma_deregister_internal(struct rpcrdma_ia *ia, struct ib_mr *mr, struct ib_sge *iov) { @@ -1757,6 +1798,61 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia, return rc; } +/** + * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers + * @ia: controlling rpcrdma_ia + * @size: size of buffer to be allocated, in bytes + * @flags: GFP flags + * + * Returns pointer to private header of an area of internally + * registered memory, or an ERR_PTR. The registered buffer follows + * the end of the private header. + * + * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for + * receiving the payload of RDMA RECV operations. regbufs are not + * used for RDMA READ/WRITE operations, thus are registered only for + * LOCAL access. + */ +struct rpcrdma_regbuf * +rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags) +{ + struct rpcrdma_regbuf *rb; + int rc; + + rc = -ENOMEM; + rb = kmalloc(sizeof(*rb) + size, flags); + if (rb == NULL) + goto out; + + rb->rg_size = size; + rb->rg_owner = NULL; + rc = rpcrdma_register_internal(ia, rb->rg_base, size, + &rb->rg_mr, &rb->rg_iov); + if (rc) + goto out_free; + + return rb; + +out_free: + kfree(rb); +out: + return ERR_PTR(rc); +} + +/** + * rpcrdma_free_regbuf - deregister and free registered buffer + * @ia: controlling rpcrdma_ia + * @rb: regbuf to be deregistered and freed + */ +void +rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +{ + if (rb) { + rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov); + kfree(rb); + } +} + /* * Wrappers for chunk registration, shared by read/write chunk code. */ @@ -1799,7 +1895,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw; + struct rpcrdma_mw *mw = seg1->rl_mw; struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_mr *mr = frmr->fr_mr; struct ib_send_wr fastreg_wr, *bad_wr; @@ -1888,12 +1984,12 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, struct ib_send_wr invalidate_wr, *bad_wr; int rc; - seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof invalidate_wr); - invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; + invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); read_lock(&ia->ri_qplock); @@ -1903,7 +1999,7 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, read_unlock(&ia->ri_qplock); if (rc) { /* Force rpcrdma_buffer_get() to retry */ - seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE; + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; dprintk("RPC: %s: failed ib_post_send for invalidate," " status %i\n", __func__, rc); } @@ -1935,8 +2031,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, - physaddrs, i, seg1->mr_dma); + rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); if (rc) { dprintk("RPC: %s: failed ib_map_phys_fmr " "%u@0x%llx+%i (%d)... status %i\n", __func__, @@ -1945,7 +2040,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, while (i--) rpcrdma_unmap_one(ia, --seg); } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; seg1->mr_len = len; @@ -1962,7 +2057,7 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, LIST_HEAD(l); int rc; - list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); + list_add(&seg1->rl_mw->r.fmr->list, &l); rc = ib_unmap_fmr(&l); read_lock(&ia->ri_qplock); while (seg1->mr_nsegs--) @@ -2104,11 +2199,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.next = NULL; recv_wr.wr_id = (u64) (unsigned long) rep; - recv_wr.sg_list = &rep->rr_iov; + recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; ib_dma_sync_single_for_cpu(ia->ri_id->device, - rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + rdmab_addr(rep->rr_rdmabuf), + rdmab_length(rep->rr_rdmabuf), + DMA_BIDIRECTIONAL); rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index b799041b75bf..d1b70397c60f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -70,6 +70,9 @@ struct rpcrdma_ia { int ri_async_rc; enum rpcrdma_memreg ri_memreg_strategy; unsigned int ri_max_frmr_depth; + struct ib_device_attr ri_devattr; + struct ib_qp_attr ri_qp_attr; + struct ib_qp_init_attr ri_qp_init_attr; }; /* @@ -83,13 +86,9 @@ struct rpcrdma_ep { atomic_t rep_cqcount; int rep_cqinit; int rep_connected; - struct rpcrdma_ia *rep_ia; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; - struct ib_sge rep_pad; /* holds zeroed pad */ - struct ib_mr *rep_pad_mr; /* holds zeroed pad */ - void (*rep_func)(struct rpcrdma_ep *); - struct rpc_xprt *rep_xprt; /* for rep_func */ + struct rpcrdma_regbuf *rep_padbuf; struct rdma_conn_param rep_remote_cma; struct sockaddr_storage rep_remote_addr; struct delayed_work rep_connect_worker; @@ -106,6 +105,44 @@ struct rpcrdma_ep { #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) +/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV + * + * The below structure appears at the front of a large region of kmalloc'd + * memory, which always starts on a good alignment boundary. + */ + +struct rpcrdma_regbuf { + size_t rg_size; + struct rpcrdma_req *rg_owner; + struct ib_mr *rg_mr; + struct ib_sge rg_iov; + __be32 rg_base[0] __attribute__ ((aligned(256))); +}; + +static inline u64 +rdmab_addr(struct rpcrdma_regbuf *rb) +{ + return rb->rg_iov.addr; +} + +static inline u32 +rdmab_length(struct rpcrdma_regbuf *rb) +{ + return rb->rg_iov.length; +} + +static inline u32 +rdmab_lkey(struct rpcrdma_regbuf *rb) +{ + return rb->rg_iov.lkey; +} + +static inline struct rpcrdma_msg * +rdmab_to_msg(struct rpcrdma_regbuf *rb) +{ + return (struct rpcrdma_msg *)rb->rg_base; +} + enum rpcrdma_chunktype { rpcrdma_noch = 0, rpcrdma_readch, @@ -134,22 +171,16 @@ enum rpcrdma_chunktype { /* temporary static scatter/gather max */ #define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ -#define MAX_RPCRDMAHDR (\ - /* max supported RPC/RDMA header */ \ - sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ - (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) struct rpcrdma_buffer; struct rpcrdma_rep { - unsigned int rr_len; /* actual received reply length */ - struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ - struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ - void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ - struct list_head rr_list; /* tasklet list */ - struct ib_sge rr_iov; /* for posting */ - struct ib_mr *rr_handle; /* handle for mem in rr_iov */ - char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ + unsigned int rr_len; + struct rpcrdma_buffer *rr_buffer; + struct rpc_xprt *rr_xprt; + void (*rr_func)(struct rpcrdma_rep *); + struct list_head rr_list; + struct rpcrdma_regbuf *rr_rdmabuf; }; /* @@ -211,10 +242,7 @@ struct rpcrdma_mw { */ struct rpcrdma_mr_seg { /* chunk descriptors */ - union { /* chunk memory handles */ - struct ib_mr *rl_mr; /* if registered directly */ - struct rpcrdma_mw *rl_mw; /* if registered from region */ - } mr_chunk; + struct rpcrdma_mw *rl_mw; /* registered MR */ u64 mr_base; /* registration result */ u32 mr_rkey; /* registration result */ u32 mr_len; /* length of chunk or segment */ @@ -227,22 +255,27 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ }; struct rpcrdma_req { - size_t rl_size; /* actual length of buffer */ unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_nchunks; /* non-zero if chunks */ unsigned int rl_connect_cookie; /* retry detection */ enum rpcrdma_chunktype rl_rtype, rl_wtype; struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ - struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ struct ib_sge rl_send_iov[4]; /* for active requests */ - struct ib_sge rl_iov; /* for posting */ - struct ib_mr *rl_handle; /* handle for mem in rl_iov */ - char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ - __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ + struct rpcrdma_regbuf *rl_rdmabuf; + struct rpcrdma_regbuf *rl_sendbuf; + struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; }; -#define rpcr_to_rdmar(r) \ - container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) + +static inline struct rpcrdma_req * +rpcr_to_rdmar(struct rpc_rqst *rqst) +{ + void *buffer = rqst->rq_buffer; + struct rpcrdma_regbuf *rb; + + rb = container_of(buffer, struct rpcrdma_regbuf, rg_base); + return rb->rg_owner; +} /* * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for @@ -252,7 +285,6 @@ struct rpcrdma_req { */ struct rpcrdma_buffer { spinlock_t rb_lock; /* protects indexes */ - atomic_t rb_credits; /* most recent server credits */ int rb_max_requests;/* client max requests */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ struct list_head rb_all; @@ -318,16 +350,16 @@ struct rpcrdma_stats { * during unmount. */ struct rpcrdma_xprt { - struct rpc_xprt xprt; + struct rpc_xprt rx_xprt; struct rpcrdma_ia rx_ia; struct rpcrdma_ep rx_ep; struct rpcrdma_buffer rx_buf; struct rpcrdma_create_data_internal rx_data; - struct delayed_work rdma_connect; + struct delayed_work rx_connect_worker; struct rpcrdma_stats rx_stats; }; -#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) +#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) /* Setting this to 0 ensures interoperability with early servers. @@ -358,9 +390,7 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, /* * Buffer calls - xprtrdma/verbs.c */ -int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, - struct rpcrdma_ia *, - struct rpcrdma_create_data_internal *); +int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); @@ -368,16 +398,16 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); -int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, - struct ib_mr **, struct ib_sge *); -int rpcrdma_deregister_internal(struct rpcrdma_ia *, - struct ib_mr *, struct ib_sge *); - int rpcrdma_register_external(struct rpcrdma_mr_seg *, int, int, struct rpcrdma_xprt *); int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, struct rpcrdma_xprt *); +struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, + size_t, gfp_t); +void rpcrdma_free_regbuf(struct rpcrdma_ia *, + struct rpcrdma_regbuf *); + /* * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 87ce7e8bb8dc..66891e32c5e3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -63,6 +63,8 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + #define XS_TCP_LINGER_TO (15U * HZ) static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; @@ -75,8 +77,6 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; * someone else's file names! */ -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; @@ -627,7 +627,7 @@ process_status: * @xprt: transport * * Initiates a graceful shutdown of the TCP socket by calling the - * equivalent of shutdown(SHUT_WR); + * equivalent of shutdown(SHUT_RDWR); */ static void xs_tcp_shutdown(struct rpc_xprt *xprt) { @@ -635,7 +635,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) struct socket *sock = transport->sock; if (sock != NULL) { - kernel_sock_shutdown(sock, SHUT_WR); + kernel_sock_shutdown(sock, SHUT_RDWR); trace_rpc_socket_shutdown(xprt, sock); } } @@ -718,9 +718,9 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); case -ECONNRESET: - xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: + case -EADDRINUSE: case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } @@ -773,6 +773,21 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s sk->sk_error_report = transport->old_error_report; } +static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) +{ + smp_mb__before_atomic(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_atomic(); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + xs_sock_reset_connection_flags(xprt); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); +} + /** * xs_error_report - callback to handle TCP socket state errors * @sk: socket @@ -792,11 +807,12 @@ static void xs_error_report(struct sock *sk) err = -sk->sk_err; if (err == 0) goto out; + /* Is this a reset event? */ + if (sk->sk_state == TCP_CLOSE) + xs_sock_mark_closed(xprt); dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -err); trace_rpc_socket_error(xprt, sk->sk_socket, err); - if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state)) - goto out; xprt_wake_pending_tasks(xprt, err); out: read_unlock_bh(&sk->sk_callback_lock); @@ -806,12 +822,11 @@ static void xs_reset_transport(struct sock_xprt *transport) { struct socket *sock = transport->sock; struct sock *sk = transport->inet; + struct rpc_xprt *xprt = &transport->xprt; if (sk == NULL) return; - transport->srcport = 0; - write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; @@ -820,8 +835,9 @@ static void xs_reset_transport(struct sock_xprt *transport) xs_restore_old_callbacks(transport, sk); write_unlock_bh(&sk->sk_callback_lock); + xs_sock_reset_connection_flags(xprt); - trace_rpc_socket_close(&transport->xprt, sock); + trace_rpc_socket_close(xprt, sock); sock_release(sock); } @@ -841,27 +857,12 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); - cancel_delayed_work_sync(&transport->connect_worker); - xs_reset_transport(transport); xprt->reestablish_timeout = 0; - smp_mb__before_atomic(); - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_atomic(); xprt_disconnect_done(xprt); } -static void xs_tcp_close(struct rpc_xprt *xprt) -{ - if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) - xs_close(xprt); - else - xs_tcp_shutdown(xprt); -} - static void xs_xprt_free(struct rpc_xprt *xprt) { xs_free_peer_addresses(xprt); @@ -1032,7 +1033,6 @@ static void xs_udp_data_ready(struct sock *sk) */ static void xs_tcp_force_close(struct rpc_xprt *xprt) { - set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); xprt_force_disconnect(xprt); } @@ -1425,54 +1425,6 @@ out: read_unlock_bh(&sk->sk_callback_lock); } -/* - * Do the equivalent of linger/linger2 handling for dealing with - * broken servers that don't close the socket in a timely - * fashion - */ -static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, - unsigned long timeout) -{ - struct sock_xprt *transport; - - if (xprt_test_and_set_connecting(xprt)) - return; - set_bit(XPRT_CONNECTION_ABORT, &xprt->state); - transport = container_of(xprt, struct sock_xprt, xprt); - queue_delayed_work(rpciod_workqueue, &transport->connect_worker, - timeout); -} - -static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) -{ - struct sock_xprt *transport; - - transport = container_of(xprt, struct sock_xprt, xprt); - - if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || - !cancel_delayed_work(&transport->connect_worker)) - return; - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - xprt_clear_connecting(xprt); -} - -static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) -{ - smp_mb__before_atomic(); - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_atomic(); -} - -static void xs_sock_mark_closed(struct rpc_xprt *xprt) -{ - xs_sock_reset_connection_flags(xprt); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); -} - /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1521,7 +1473,6 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_atomic(); - xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ @@ -1538,13 +1489,11 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); - xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); smp_mb__before_atomic(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_atomic(); break; case TCP_CLOSE: - xs_tcp_cancel_linger_timeout(xprt); xs_sock_mark_closed(xprt); } out: @@ -1667,6 +1616,40 @@ static unsigned short xs_get_random_port(void) } /** + * xs_set_reuseaddr_port - set the socket's port and address reuse options + * @sock: socket + * + * Note that this function has to be called on all sockets that share the + * same port, and it must be called before binding. + */ +static void xs_sock_set_reuseport(struct socket *sock) +{ + int opt = 1; + + kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, + (char *)&opt, sizeof(opt)); +} + +static unsigned short xs_sock_getport(struct socket *sock) +{ + struct sockaddr_storage buf; + int buflen; + unsigned short port = 0; + + if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0) + goto out; + switch (buf.ss_family) { + case AF_INET6: + port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port); + break; + case AF_INET: + port = ntohs(((struct sockaddr_in *)&buf)->sin_port); + } +out: + return port; +} + +/** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport * @port: new port number @@ -1680,6 +1663,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) xs_update_peer_port(xprt); } +static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) +{ + if (transport->srcport == 0) + transport->srcport = xs_sock_getport(sock); +} + static unsigned short xs_get_srcport(struct sock_xprt *transport) { unsigned short port = transport->srcport; @@ -1833,7 +1822,8 @@ static void xs_dummy_setup_socket(struct work_struct *work) } static struct socket *xs_create_sock(struct rpc_xprt *xprt, - struct sock_xprt *transport, int family, int type, int protocol) + struct sock_xprt *transport, int family, int type, + int protocol, bool reuseport) { struct socket *sock; int err; @@ -1846,6 +1836,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, } xs_reclassify_socket(family, sock); + if (reuseport) + xs_sock_set_reuseport(sock); + err = xs_bind(transport, sock); if (err) { sock_release(sock); @@ -1903,7 +1896,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport) struct socket *sock; int status = -EIO; - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); status = __sock_create(xprt->xprt_net, AF_LOCAL, SOCK_STREAM, 0, &sock, 1); if (status < 0) { @@ -2044,10 +2036,9 @@ static void xs_udp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; int status = -EIO; - /* Start by resetting any existing state */ - xs_reset_transport(transport); sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP); + xs_addr(xprt)->sa_family, SOCK_DGRAM, + IPPROTO_UDP, false); if (IS_ERR(sock)) goto out; @@ -2061,61 +2052,11 @@ static void xs_udp_setup_socket(struct work_struct *work) trace_rpc_socket_connect(xprt, sock, 0); status = 0; out: + xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); } -/* - * We need to preserve the port number so the reply cache on the server can - * find our cached RPC replies when we get around to reconnecting. - */ -static void xs_abort_connection(struct sock_xprt *transport) -{ - int result; - struct sockaddr any; - - dprintk("RPC: disconnecting xprt %p to reuse port\n", transport); - - /* - * Disconnect the transport socket by doing a connect operation - * with AF_UNSPEC. This should return immediately... - */ - memset(&any, 0, sizeof(any)); - any.sa_family = AF_UNSPEC; - result = kernel_connect(transport->sock, &any, sizeof(any), 0); - trace_rpc_socket_reset_connection(&transport->xprt, - transport->sock, result); - if (!result) - xs_sock_reset_connection_flags(&transport->xprt); - dprintk("RPC: AF_UNSPEC connect return code %d\n", result); -} - -static void xs_tcp_reuse_connection(struct sock_xprt *transport) -{ - unsigned int state = transport->inet->sk_state; - - if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) { - /* we don't need to abort the connection if the socket - * hasn't undergone a shutdown - */ - if (transport->inet->sk_shutdown == 0) - return; - dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n", - __func__, transport->inet->sk_shutdown); - } - if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) { - /* we don't need to abort the connection if the socket - * hasn't undergone a shutdown - */ - if (transport->inet->sk_shutdown == 0) - return; - dprintk("RPC: %s: ESTABLISHED/SYN_SENT " - "sk_shutdown set to %d\n", - __func__, transport->inet->sk_shutdown); - } - xs_abort_connection(transport); -} - static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -2149,9 +2090,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_allocation = GFP_ATOMIC; /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; xprt_clear_connected(xprt); @@ -2174,6 +2113,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { case 0: + xs_set_srcport(transport, sock); case -EINPROGRESS: /* SYN_SENT! */ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) @@ -2200,25 +2140,13 @@ static void xs_tcp_setup_socket(struct work_struct *work) int status = -EIO; if (!sock) { - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP); + xs_addr(xprt)->sa_family, SOCK_STREAM, + IPPROTO_TCP, true); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; } - } else { - int abort_and_exit; - - abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, - &xprt->state); - /* "close" the socket, preserving the local port */ - set_bit(XPRT_CONNECTION_REUSE, &xprt->state); - xs_tcp_reuse_connection(transport); - clear_bit(XPRT_CONNECTION_REUSE, &xprt->state); - - if (abort_and_exit) - goto out_eagain; } dprintk("RPC: worker connecting xprt %p via %s to " @@ -2245,6 +2173,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: + xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); return; case -EINVAL: @@ -2254,13 +2183,15 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EADDRINUSE: case -ENOBUFS: /* retry with existing socket, after a delay */ + xs_tcp_force_close(xprt); goto out; } -out_eagain: status = -EAGAIN; out: + xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); } @@ -2283,6 +2214,11 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); + + /* Start by resetting any existing state */ + xs_reset_transport(transport); + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", @@ -2559,7 +2495,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_close, + .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index d162b21b14bd..8c1e558db118 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -11,6 +11,8 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/init.h> +#include <linux/mutex.h> +#include <linux/notifier.h> #include <linux/netdevice.h> #include <net/switchdev.h> @@ -50,3 +52,176 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state) return ops->ndo_switch_port_stp_update(dev, state); } EXPORT_SYMBOL(netdev_switch_port_stp_update); + +static DEFINE_MUTEX(netdev_switch_mutex); +static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); + +/** + * register_netdev_switch_notifier - Register nofifier + * @nb: notifier_block + * + * Register switch device notifier. This should be used by code + * which needs to monitor events happening in particular device. + * Return values are same as for atomic_notifier_chain_register(). + */ +int register_netdev_switch_notifier(struct notifier_block *nb) +{ + int err; + + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(register_netdev_switch_notifier); + +/** + * unregister_netdev_switch_notifier - Unregister nofifier + * @nb: notifier_block + * + * Unregister switch device notifier. + * Return values are same as for atomic_notifier_chain_unregister(). + */ +int unregister_netdev_switch_notifier(struct notifier_block *nb) +{ + int err; + + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(unregister_netdev_switch_notifier); + +/** + * call_netdev_switch_notifiers - Call nofifiers + * @val: value passed unmodified to notifier function + * @dev: port device + * @info: notifier information data + * + * Call all network notifier blocks. This should be called by driver + * when it needs to propagate hardware event. + * Return values are same as for atomic_notifier_call_chain(). + */ +int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, + struct netdev_switch_notifier_info *info) +{ + int err; + + info->dev = dev; + mutex_lock(&netdev_switch_mutex); + err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); + mutex_unlock(&netdev_switch_mutex); + return err; +} +EXPORT_SYMBOL(call_netdev_switch_notifiers); + +/** + * netdev_switch_port_bridge_setlink - Notify switch device port of bridge + * port attributes + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify switch device port of bridge port attributes + */ +int netdev_switch_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return 0; + + if (!ops->ndo_bridge_setlink) + return -EOPNOTSUPP; + + return ops->ndo_bridge_setlink(dev, nlh, flags); +} +EXPORT_SYMBOL(netdev_switch_port_bridge_setlink); + +/** + * netdev_switch_port_bridge_dellink - Notify switch device port of bridge + * port attribute delete + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify switch device port of bridge port attribute delete + */ +int netdev_switch_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return 0; + + if (!ops->ndo_bridge_dellink) + return -EOPNOTSUPP; + + return ops->ndo_bridge_dellink(dev, nlh, flags); +} +EXPORT_SYMBOL(netdev_switch_port_bridge_dellink); + +/** + * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink + * op for master devices + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge setlink flags + * + * Notify master device slaves of bridge port attributes + */ +int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + struct net_device *lower_dev; + struct list_head *iter; + int ret = 0, err = 0; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return ret; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); + if (err && err != -EOPNOTSUPP) + ret = err; + } + + return ret; +} +EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink); + +/** + * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink + * op for master devices + * + * @dev: port device + * @nlh: netlink msg with bridge port attributes + * @flags: bridge dellink flags + * + * Notify master device slaves of bridge port attribute deletes + */ +int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) +{ + struct net_device *lower_dev; + struct list_head *iter; + int ret = 0, err = 0; + + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) + return ret; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); + if (err && err != -EOPNOTSUPP) + ret = err; + } + + return ret; +} +EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index c890848f9d56..91c8a8e031db 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -20,18 +20,6 @@ menuconfig TIPC If in doubt, say N. -config TIPC_PORTS - int "Maximum number of ports in a node" - depends on TIPC - range 127 65535 - default "8191" - help - Specifies how many ports can be supported by a node. - Can range from 127 to 65535 ports; default is 8191. - - Setting this to a smaller value saves some memory, - setting it to higher allows for more ports. - config TIPC_MEDIA_IB bool "InfiniBand media type support" depends on TIPC && INFINIBAND_IPOIB diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 333e4592772c..599b1a540d2b 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -4,11 +4,11 @@ obj-$(CONFIG_TIPC) := tipc.o -tipc-y += addr.o bcast.o bearer.o config.o \ +tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ - netlink.o node.o socket.o log.o eth_media.o \ - server.o + netlink.o netlink_compat.o node.o socket.o eth_media.o \ + server.o socket.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 357b74b26f9e..48fd3b5a73fb 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -34,8 +34,51 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" +#include <linux/kernel.h> #include "addr.h" +#include "core.h" + +/** + * in_own_cluster - test for cluster inclusion; <0.0.0> always matches + */ +int in_own_cluster(struct net *net, u32 addr) +{ + return in_own_cluster_exact(net, addr) || !addr; +} + +int in_own_cluster_exact(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return !((addr ^ tn->own_addr) >> 12); +} + +/** + * in_own_node - test for node inclusion; <0.0.0> always matches + */ +int in_own_node(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return (addr == tn->own_addr) || !addr; +} + +/** + * addr_domain - convert 2-bit scope value to equivalent message lookup domain + * + * Needed when address of a named message must be looked up a second time + * after a network hop. + */ +u32 addr_domain(struct net *net, u32 sc) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (likely(sc == TIPC_NODE_SCOPE)) + return tn->own_addr; + if (sc == TIPC_CLUSTER_SCOPE) + return tipc_cluster_mask(tn->own_addr); + return tipc_zone_mask(tn->own_addr); +} /** * tipc_addr_domain_valid - validates a network domain address diff --git a/net/tipc/addr.h b/net/tipc/addr.h index a74acf9ee804..c700c2d28e09 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,7 +37,10 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H -#include "core.h" +#include <linux/types.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #define TIPC_ZONE_MASK 0xff000000u #define TIPC_CLUSTER_MASK 0xfffff000u @@ -52,42 +55,10 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_CLUSTER_MASK; } -static inline int in_own_cluster_exact(u32 addr) -{ - return !((addr ^ tipc_own_addr) >> 12); -} - -/** - * in_own_node - test for node inclusion; <0.0.0> always matches - */ -static inline int in_own_node(u32 addr) -{ - return (addr == tipc_own_addr) || !addr; -} - -/** - * in_own_cluster - test for cluster inclusion; <0.0.0> always matches - */ -static inline int in_own_cluster(u32 addr) -{ - return in_own_cluster_exact(addr) || !addr; -} - -/** - * addr_domain - convert 2-bit scope value to equivalent message lookup domain - * - * Needed when address of a named message must be looked up a second time - * after a network hop. - */ -static inline u32 addr_domain(u32 sc) -{ - if (likely(sc == TIPC_NODE_SCOPE)) - return tipc_own_addr; - if (sc == TIPC_CLUSTER_SCOPE) - return tipc_cluster_mask(tipc_own_addr); - return tipc_zone_mask(tipc_own_addr); -} - +int in_own_cluster(struct net *net, u32 addr); +int in_own_cluster_exact(struct net *net, u32 addr); +int in_own_node(struct net *net, u32 addr); +u32 addr_domain(struct net *net, u32 sc); int tipc_addr_domain_valid(u32); int tipc_addr_node_valid(u32 addr); int tipc_in_scope(u32 domain, u32 addr); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 96ceefeb9daf..3e41704832de 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, 2014, Ericsson AB + * Copyright (c) 2004-2006, 2014-2015, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -35,77 +35,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" -#include "link.h" #include "socket.h" #include "msg.h" #include "bcast.h" #include "name_distr.h" +#include "core.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ -#define BCBEARER MAX_BEARERS - -/** - * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link - * @primary: pointer to primary bearer - * @secondary: pointer to secondary bearer - * - * Bearers must have same priority and same set of reachable destinations - * to be paired. - */ - -struct tipc_bcbearer_pair { - struct tipc_bearer *primary; - struct tipc_bearer *secondary; -}; - -/** - * struct tipc_bcbearer - bearer used by broadcast link - * @bearer: (non-standard) broadcast bearer structure - * @media: (non-standard) broadcast media structure - * @bpairs: array of bearer pairs - * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() - * @remains: temporary node map used by tipc_bcbearer_send() - * @remains_new: temporary node map used tipc_bcbearer_send() - * - * Note: The fields labelled "temporary" are incorporated into the bearer - * to avoid consuming potentially limited stack space through the use of - * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bclink_lock". - */ -struct tipc_bcbearer { - struct tipc_bearer bearer; - struct tipc_media media; - struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; - struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct tipc_node_map remains; - struct tipc_node_map remains_new; -}; - -/** - * struct tipc_bclink - link used for broadcast messages - * @lock: spinlock governing access to structure - * @link: (non-standard) broadcast link structure - * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states - * @bcast_nodes: map of broadcast-capable nodes - * @retransmit_to: node that most recently requested a retransmit - * - * Handles sequence numbering, fragmentation, bundling, etc. - */ -struct tipc_bclink { - spinlock_t lock; - struct tipc_link link; - struct tipc_node node; - unsigned int flags; - struct tipc_node_map bcast_nodes; - struct tipc_node *retransmit_to; -}; - -static struct tipc_bcbearer *bcbearer; -static struct tipc_bclink *bclink; -static struct tipc_link *bcl; const char tipc_bclink_name[] = "broadcast-link"; @@ -115,38 +52,50 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); -static void tipc_bclink_lock(void) +static void tipc_bclink_lock(struct net *net) { - spin_lock_bh(&bclink->lock); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&tn->bclink->lock); } -static void tipc_bclink_unlock(void) +static void tipc_bclink_unlock(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node = NULL; - if (likely(!bclink->flags)) { - spin_unlock_bh(&bclink->lock); + if (likely(!tn->bclink->flags)) { + spin_unlock_bh(&tn->bclink->lock); return; } - if (bclink->flags & TIPC_BCLINK_RESET) { - bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(); + if (tn->bclink->flags & TIPC_BCLINK_RESET) { + tn->bclink->flags &= ~TIPC_BCLINK_RESET; + node = tipc_bclink_retransmit_to(net); } - spin_unlock_bh(&bclink->lock); + spin_unlock_bh(&tn->bclink->lock); if (node) tipc_link_reset_all(node); } +void tipc_bclink_input(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq); +} + uint tipc_bclink_get_mtu(void) { return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(unsigned int flags) +void tipc_bclink_set_flags(struct net *net, unsigned int flags) { - bclink->flags |= flags; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tn->bclink->flags |= flags; } static u32 bcbuf_acks(struct sk_buff *buf) @@ -164,31 +113,40 @@ static void bcbuf_decr_acks(struct sk_buff *buf) bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); } -void tipc_bclink_add_node(u32 addr) +void tipc_bclink_add_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_add(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_add(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -void tipc_bclink_remove_node(u32 addr) +void tipc_bclink_remove_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_remove(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -static void bclink_set_last_sent(void) +static void bclink_set_last_sent(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (bcl->next_out) bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); else bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); } -u32 tipc_bclink_get_last_sent(void) +u32 tipc_bclink_get_last_sent(struct net *net) { - return bcl->fsm_msg_cnt; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bcl->fsm_msg_cnt; } static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) @@ -203,9 +161,11 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) * * Called with bclink_lock locked */ -struct tipc_node *tipc_bclink_retransmit_to(void) +struct tipc_node *tipc_bclink_retransmit_to(struct net *net) { - return bclink->retransmit_to; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bclink->retransmit_to; } /** @@ -215,15 +175,17 @@ struct tipc_node *tipc_bclink_retransmit_to(void) * * Called with bclink_lock locked */ -static void bclink_retransmit_pkt(u32 after, u32 to) +static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) { struct sk_buff *skb; + struct tipc_link *bcl = tn->bcl; skb_queue_walk(&bcl->outqueue, skb) { - if (more(buf_seqno(skb), after)) + if (more(buf_seqno(skb), after)) { + tipc_link_retransmit(bcl, skb, mod(to - after)); break; + } } - tipc_link_retransmit(bcl, skb, mod(to - after)); } /** @@ -231,13 +193,11 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * * Called with no locks taken */ -void tipc_bclink_wakeup_users(void) +void tipc_bclink_wakeup_users(struct net *net) { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&bclink->link.waiting_sks))) - tipc_sk_rcv(skb); + struct tipc_net *tn = net_generic(net, tipc_net_id); + tipc_sk_rcv(net, &tn->bclink->link.wakeupq); } /** @@ -252,10 +212,12 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *skb, *tmp; struct sk_buff *next; unsigned int released = 0; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); - tipc_bclink_lock(); + tipc_bclink_lock(net); /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&bcl->outqueue); + skb = skb_peek(&tn->bcl->outqueue); if (!skb) goto exit; @@ -266,43 +228,43 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) * acknowledge sent messages only (if other nodes still exist) * or both sent and unsent messages (otherwise) */ - if (bclink->bcast_nodes.count) - acked = bcl->fsm_msg_cnt; + if (tn->bclink->bcast_nodes.count) + acked = tn->bcl->fsm_msg_cnt; else - acked = bcl->next_out_no; + acked = tn->bcl->next_out_no; } else { /* * Bail out if specified sequence number does not correspond * to a message that has been sent and not yet acknowledged */ if (less(acked, buf_seqno(skb)) || - less(bcl->fsm_msg_cnt, acked) || + less(tn->bcl->fsm_msg_cnt, acked) || less_eq(acked, n_ptr->bclink.acked)) goto exit; } /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&bcl->outqueue, skb) { + skb_queue_walk(&tn->bcl->outqueue, skb) { if (more(buf_seqno(skb), n_ptr->bclink.acked)) break; } /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&bcl->outqueue, skb, tmp) { + skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) { if (more(buf_seqno(skb), acked)) break; - next = tipc_skb_queue_next(&bcl->outqueue, skb); - if (skb != bcl->next_out) { + next = tipc_skb_queue_next(&tn->bcl->outqueue, skb); + if (skb != tn->bcl->next_out) { bcbuf_decr_acks(skb); } else { bcbuf_set_acks(skb, 0); - bcl->next_out = next; - bclink_set_last_sent(); + tn->bcl->next_out = next; + bclink_set_last_sent(net); } if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &bcl->outqueue); + __skb_unlink(skb, &tn->bcl->outqueue); kfree_skb(skb); released = 1; } @@ -310,15 +272,14 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) n_ptr->bclink.acked = acked; /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(bcl->next_out)) { - tipc_link_push_packets(bcl); - bclink_set_last_sent(); + if (unlikely(tn->bcl->next_out)) { + tipc_link_push_packets(tn->bcl); + bclink_set_last_sent(net); } - if (unlikely(released && !skb_queue_empty(&bcl->waiting_sks))) + if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq))) n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; - exit: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } /** @@ -326,9 +287,12 @@ exit: * * RCU and node lock set */ -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) +void tipc_bclink_update_link_state(struct tipc_node *n_ptr, + u32 last_sent) { struct sk_buff *buf; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); /* Ignore "stale" link state info */ if (less_eq(last_sent, n_ptr->bclink.last_in)) @@ -358,18 +322,18 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, + tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); + msg_set_mc_netid(msg, tn->net_id); msg_set_bcast_ack(msg, n_ptr->bclink.last_in); msg_set_bcgap_after(msg, n_ptr->bclink.last_in); msg_set_bcgap_to(msg, to); - tipc_bclink_lock(); - tipc_bearer_send(MAX_BEARERS, buf, NULL); - bcl->stats.sent_nacks++; - tipc_bclink_unlock(); + tipc_bclink_lock(net); + tipc_bearer_send(net, MAX_BEARERS, buf, NULL); + tn->bcl->stats.sent_nacks++; + tipc_bclink_unlock(net); kfree_skb(buf); n_ptr->bclink.oos_state++; @@ -382,9 +346,9 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. */ -static void bclink_peek_nack(struct tipc_msg *msg) +static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) { - struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg)); + struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg)); if (unlikely(!n_ptr)) return; @@ -399,17 +363,23 @@ static void bclink_peek_nack(struct tipc_msg *msg) tipc_node_unlock(n_ptr); } -/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster +/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster * and to identified node local sockets + * @net: the applicable net namespace * @list: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_bclink_xmit(struct sk_buff_head *list) +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct tipc_bclink *bclink = tn->bclink; int rc = 0; int bc = 0; struct sk_buff *skb; + struct sk_buff_head arrvq; + struct sk_buff_head inputq; /* Prepare clone of message for local node */ skb = tipc_msg_reassemble(list); @@ -418,32 +388,35 @@ int tipc_bclink_xmit(struct sk_buff_head *list) return -EHOSTUNREACH; } - /* Broadcast to all other nodes */ + /* Broadcast to all nodes */ if (likely(bclink)) { - tipc_bclink_lock(); + tipc_bclink_lock(net); if (likely(bclink->bcast_nodes.count)) { - rc = __tipc_link_xmit(bcl, list); + rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { u32 len = skb_queue_len(&bcl->outqueue); - bclink_set_last_sent(); + bclink_set_last_sent(net); bcl->stats.queue_sz_counts++; bcl->stats.accu_queue_sz += len; } bc = 1; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } if (unlikely(!bc)) __skb_queue_purge(list); - /* Deliver message clone */ - if (likely(!rc)) - tipc_sk_mcast_rcv(skb); - else + if (unlikely(rc)) { kfree_skb(skb); - + return rc; + } + /* Deliver message clone */ + __skb_queue_head_init(&arrvq); + skb_queue_head_init(&inputq); + __skb_queue_tail(&arrvq, skb); + tipc_sk_mcast_rcv(net, &arrvq, &inputq); return rc; } @@ -454,19 +427,21 @@ int tipc_bclink_xmit(struct sk_buff_head *list) */ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) { + struct tipc_net *tn = net_generic(node->net, tipc_net_id); + bclink_update_last_sent(node, seqno); node->bclink.last_in = seqno; node->bclink.oos_state = 0; - bcl->stats.recv_info++; + tn->bcl->stats.recv_info++; /* * Unicast an ACK periodically, ensuring that * all nodes in the cluster don't ACK at the same time */ - if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { + if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], STATE_MSG, 0, 0, 0, 0, 0); - bcl->stats.sent_acks++; + tn->bcl->stats.sent_acks++; } } @@ -475,19 +450,24 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) * * RCU is locked, no other locks set */ -void tipc_bclink_rcv(struct sk_buff *buf) +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; struct tipc_msg *msg = buf_msg(buf); struct tipc_node *node; u32 next_in; u32 seqno; int deferred = 0; + int pos = 0; + struct sk_buff *iskb; + struct sk_buff_head *arrvq, *inputq; /* Screen out unwanted broadcast messages */ - if (msg_mc_netid(msg) != tipc_net_id) + if (msg_mc_netid(msg) != tn->net_id) goto exit; - node = tipc_node_find(msg_prevnode(msg)); + node = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!node)) goto exit; @@ -499,18 +479,18 @@ void tipc_bclink_rcv(struct sk_buff *buf) if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { if (msg_type(msg) != STATE_MSG) goto unlock; - if (msg_destnode(msg) == tipc_own_addr) { + if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); - tipc_bclink_lock(); + tipc_bclink_lock(net); bcl->stats.recv_nacks++; - bclink->retransmit_to = node; - bclink_retransmit_pkt(msg_bcgap_after(msg), + tn->bclink->retransmit_to = node; + bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } else { tipc_node_unlock(node); - bclink_peek_nack(msg); + bclink_peek_nack(net, msg); } goto exit; } @@ -518,52 +498,54 @@ void tipc_bclink_rcv(struct sk_buff *buf) /* Handle in-sequence broadcast message */ seqno = msg_seqno(msg); next_in = mod(node->bclink.last_in + 1); + arrvq = &tn->bclink->arrvq; + inputq = &tn->bclink->inputq; if (likely(seqno == next_in)) { receive: /* Deliver message to destination */ if (likely(msg_isdata(msg))) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, buf); + spin_unlock_bh(&inputq->lock); + node->action_flags |= TIPC_BCAST_MSG_EVT; + tipc_bclink_unlock(net); tipc_node_unlock(node); - if (likely(msg_mcast(msg))) - tipc_sk_mcast_rcv(buf); - else - kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); - tipc_bclink_unlock(); + pos = 0; + while (tipc_msg_extract(buf, &iskb, &pos)) { + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, iskb); + spin_unlock_bh(&inputq->lock); + } + node->action_flags |= TIPC_BCAST_MSG_EVT; + tipc_bclink_unlock(net); tipc_node_unlock(node); - tipc_link_bundle_rcv(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { tipc_buf_append(&node->bclink.reasm_buf, &buf); if (unlikely(!buf && !node->bclink.reasm_buf)) goto unlock; - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; msg = buf_msg(buf); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); goto receive; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); - } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - tipc_bclink_lock(); - bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); - tipc_node_unlock(node); - tipc_named_rcv(buf); } else { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); kfree_skb(buf); } @@ -601,14 +583,14 @@ receive: buf = NULL; } - tipc_bclink_lock(); + tipc_bclink_lock(net); if (deferred) bcl->stats.deferred_recv++; else bcl->stats.duplicates++; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); unlock: tipc_node_unlock(node); @@ -619,7 +601,7 @@ exit: u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) { return (n_ptr->bclink.recv_permitted && - (tipc_bclink_get_last_sent() != n_ptr->bclink.acked)); + (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked)); } @@ -632,11 +614,15 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, + struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; struct tipc_msg *msg = buf_msg(buf); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; + struct tipc_bclink *bclink = tn->bclink; /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; @@ -646,8 +632,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (likely(!msg_non_seq(buf_msg(buf)))) { bcbuf_set_acks(buf, bclink->bcast_nodes.count); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); - bcl->stats.sent_info++; + msg_set_mc_netid(msg, tn->net_id); + tn->bcl->stats.sent_info++; if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); @@ -676,13 +662,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b->identity, buf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, tbuf, + &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } if (bcbearer->remains_new.count == 0) @@ -697,15 +684,18 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; struct tipc_bearer *b; int b_index; int pri; - tipc_bclink_lock(); + tipc_bclink_lock(net); if (action) tipc_nmap_add(nm_ptr, node); @@ -717,7 +707,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - b = rcu_dereference_rtnl(bearer_list[b_index]); + b = rcu_dereference_rtnl(tn->bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -752,7 +742,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) bp_curr++; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, @@ -806,19 +796,21 @@ msg_full: return -EMSGSIZE; } -int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; - tipc_bclink_lock(); + tipc_bclink_lock(net); - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; @@ -851,7 +843,7 @@ int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) if (err) goto attr_msg_full; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -862,79 +854,49 @@ prop_msg_full: attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } -int tipc_bclink_stats(char *buf, const u32 buf_size) +int tipc_bclink_reset_stats(struct net *net) { - int ret; - struct tipc_stats *s; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) - return 0; - - tipc_bclink_lock(); - - s = &bcl->stats; - - ret = tipc_snprintf(buf, buf_size, "Link <%s>\n" - " Window:%u packets\n", - bcl->name, bcl->queue_limit[0]); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->recv_info, s->recv_fragments, - s->recv_fragmented, s->recv_bundles, - s->recv_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->sent_info, s->sent_fragments, - s->sent_fragmented, s->sent_bundles, - s->sent_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX naks:%u defs:%u dups:%u\n", - s->recv_nacks, s->deferred_recv, s->duplicates); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX naks:%u acks:%u dups:%u\n", - s->sent_nacks, s->sent_acks, s->retransmitted); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " Congestion link:%u Send queue max:%u avg:%u\n", - s->link_congs, s->max_queue_sz, - s->queue_sz_counts ? - (s->accu_queue_sz / s->queue_sz_counts) : 0); - - tipc_bclink_unlock(); - return ret; -} - -int tipc_bclink_reset_stats(void) -{ - if (!bcl) return -ENOPROTOOPT; - tipc_bclink_lock(); + tipc_bclink_lock(net); memset(&bcl->stats, 0, sizeof(bcl->stats)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } -int tipc_bclink_set_queue_limits(u32 limit) +int tipc_bclink_set_queue_limits(struct net *net, u32 limit) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (!bcl) return -ENOPROTOOPT; if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) return -EINVAL; - tipc_bclink_lock(); + tipc_bclink_lock(net); tipc_link_set_queue_limits(bcl, limit); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } -int tipc_bclink_init(void) +int tipc_bclink_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); if (!bcbearer) return -ENOMEM; @@ -953,30 +915,39 @@ int tipc_bclink_init(void) spin_lock_init(&bclink->lock); __skb_queue_head_init(&bcl->outqueue); __skb_queue_head_init(&bcl->deferred_queue); - skb_queue_head_init(&bcl->waiting_sks); + skb_queue_head_init(&bcl->wakeupq); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); - __skb_queue_head_init(&bclink->node.waiting_sks); + __skb_queue_head_init(&bclink->arrvq); + skb_queue_head_init(&bclink->inputq); bcl->owner = &bclink->node; + bcl->owner->net = net; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; - rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); + rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; + bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg; + msg_set_prevnode(bcl->pmsg, tn->own_addr); strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + tn->bcbearer = bcbearer; + tn->bclink = bclink; + tn->bcl = bcl; return 0; } -void tipc_bclink_stop(void) +void tipc_bclink_stop(struct net *net) { - tipc_bclink_lock(); - tipc_link_purge_queues(bcl); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_link_purge_queues(tn->bcl); + tipc_bclink_unlock(net); - RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); + RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); synchronize_net(); - kfree(bcbearer); - kfree(bclink); + kfree(tn->bcbearer); + kfree(tn->bclink); } /** @@ -1036,50 +1007,3 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, } } } - -/** - * tipc_port_list_add - add a port to a port list, ensuring no duplicates - */ -void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port) -{ - struct tipc_port_list *item = pl_ptr; - int i; - int item_sz = PLSIZE; - int cnt = pl_ptr->count; - - for (; ; cnt -= item_sz, item = item->next) { - if (cnt < PLSIZE) - item_sz = cnt; - for (i = 0; i < item_sz; i++) - if (item->ports[i] == port) - return; - if (i < PLSIZE) { - item->ports[i] = port; - pl_ptr->count++; - return; - } - if (!item->next) { - item->next = kmalloc(sizeof(*item), GFP_ATOMIC); - if (!item->next) { - pr_warn("Incomplete multicast delivery, no memory\n"); - return; - } - item->next->next = NULL; - } - } -} - -/** - * tipc_port_list_free - free dynamically created entries in port_list chain - * - */ -void tipc_port_list_free(struct tipc_port_list *pl_ptr) -{ - struct tipc_port_list *item; - struct tipc_port_list *next; - - for (item = pl_ptr->next; item; item = next) { - next = item->next; - kfree(item); - } -} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 644d79129fba..43f397fbac55 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -1,7 +1,7 @@ /* * net/tipc/bcast.h: Include file for TIPC broadcast code * - * Copyright (c) 2003-2006, 2014, Ericsson AB + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -37,39 +37,73 @@ #ifndef _TIPC_BCAST_H #define _TIPC_BCAST_H -#include "netlink.h" - -#define MAX_NODES 4096 -#define WSIZE 32 -#define TIPC_BCLINK_RESET 1 +#include <linux/tipc_config.h> +#include "link.h" +#include "node.h" /** - * struct tipc_node_map - set of node identifiers - * @count: # of nodes in set - * @map: bitmap of node identifiers that are in the set + * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link + * @primary: pointer to primary bearer + * @secondary: pointer to secondary bearer + * + * Bearers must have same priority and same set of reachable destinations + * to be paired. */ -struct tipc_node_map { - u32 count; - u32 map[MAX_NODES / WSIZE]; + +struct tipc_bcbearer_pair { + struct tipc_bearer *primary; + struct tipc_bearer *secondary; }; -#define PLSIZE 32 +#define TIPC_BCLINK_RESET 1 +#define BCBEARER MAX_BEARERS /** - * struct tipc_port_list - set of node local destination ports - * @count: # of ports in set (only valid for first entry in list) - * @next: pointer to next entry in list - * @ports: array of port references + * struct tipc_bcbearer - bearer used by broadcast link + * @bearer: (non-standard) broadcast bearer structure + * @media: (non-standard) broadcast media structure + * @bpairs: array of bearer pairs + * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() + * @remains: temporary node map used by tipc_bcbearer_send() + * @remains_new: temporary node map used tipc_bcbearer_send() + * + * Note: The fields labelled "temporary" are incorporated into the bearer + * to avoid consuming potentially limited stack space through the use of + * large local variables within multicast routines. Concurrent access is + * prevented through use of the spinlock "bclink_lock". */ -struct tipc_port_list { - int count; - struct tipc_port_list *next; - u32 ports[PLSIZE]; +struct tipc_bcbearer { + struct tipc_bearer bearer; + struct tipc_media media; + struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; + struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; + struct tipc_node_map remains; + struct tipc_node_map remains_new; }; +/** + * struct tipc_bclink - link used for broadcast messages + * @lock: spinlock governing access to structure + * @link: (non-standard) broadcast link structure + * @node: (non-standard) node structure representing b'cast link's peer node + * @flags: represent bclink states + * @bcast_nodes: map of broadcast-capable nodes + * @retransmit_to: node that most recently requested a retransmit + * + * Handles sequence numbering, fragmentation, bundling, etc. + */ +struct tipc_bclink { + spinlock_t lock; + struct tipc_link link; + struct tipc_node node; + unsigned int flags; + struct sk_buff_head arrvq; + struct sk_buff_head inputq; + struct tipc_node_map bcast_nodes; + struct tipc_node *retransmit_to; +}; struct tipc_node; - extern const char tipc_bclink_name[]; /** @@ -81,27 +115,26 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } -void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); -void tipc_port_list_free(struct tipc_port_list *pl_ptr); - -int tipc_bclink_init(void); -void tipc_bclink_stop(void); -void tipc_bclink_set_flags(unsigned int flags); -void tipc_bclink_add_node(u32 addr); -void tipc_bclink_remove_node(u32 addr); -struct tipc_node *tipc_bclink_retransmit_to(void); +int tipc_bclink_init(struct net *net); +void tipc_bclink_stop(struct net *net); +void tipc_bclink_set_flags(struct net *tn, unsigned int flags); +void tipc_bclink_add_node(struct net *net, u32 addr); +void tipc_bclink_remove_node(struct net *net, u32 addr); +struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -void tipc_bclink_rcv(struct sk_buff *buf); -u32 tipc_bclink_get_last_sent(void); +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf); +u32 tipc_bclink_get_last_sent(struct net *net); u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent); -int tipc_bclink_stats(char *stats_buf, const u32 buf_size); -int tipc_bclink_reset_stats(void); -int tipc_bclink_set_queue_limits(u32 limit); -void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); +void tipc_bclink_update_link_state(struct tipc_node *node, + u32 last_sent); +int tipc_bclink_reset_stats(struct net *net); +int tipc_bclink_set_queue_limits(struct net *net, u32 limit); +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action); uint tipc_bclink_get_mtu(void); -int tipc_bclink_xmit(struct sk_buff_head *list); -void tipc_bclink_wakeup_users(void); -int tipc_nl_add_bc_link(struct tipc_nl_msg *msg); +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); +void tipc_bclink_wakeup_users(struct net *net); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +void tipc_bclink_input(struct net *net); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 463db5b15b8b..48852c2dcc03 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -34,11 +34,12 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" -#include "config.h" #include "bearer.h" #include "link.h" #include "discover.h" +#include "bcast.h" #define MAX_ADDR_STR 60 @@ -67,9 +68,8 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; - -static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down); +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down); /** * tipc_media_find - locates specified media object by name @@ -111,38 +111,18 @@ void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) m_ptr = media_find_id(a->media_id); if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) - ret = tipc_snprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); else { u32 i; - ret = tipc_snprintf(buf, len, "UNKNOWN(%u)", a->media_id); + ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); for (i = 0; i < sizeof(a->value); i++) - ret += tipc_snprintf(buf - ret, len + ret, + ret += scnprintf(buf - ret, len + ret, "-%02x", a->value[i]); } } /** - * tipc_media_get_names - record names of registered media in buffer - */ -struct sk_buff *tipc_media_get_names(void) -{ - struct sk_buff *buf; - int i; - - buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME)); - if (!buf) - return NULL; - - for (i = 0; media_info_array[i] != NULL; i++) { - tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, - media_info_array[i]->name, - strlen(media_info_array[i]->name) + 1); - } - return buf; -} - -/** * bearer_name_validate - validate & (optionally) deconstruct bearer name * @name: ptr to bearer name string * @name_parts: ptr to area for bearer name components (or NULL if not needed) @@ -190,68 +170,43 @@ static int bearer_name_validate(const char *name, /** * tipc_bearer_find - locates bearer object with matching bearer name */ -struct tipc_bearer *tipc_bearer_find(const char *name) +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (b_ptr && (!strcmp(b_ptr->name, name))) return b_ptr; } return NULL; } -/** - * tipc_bearer_get_names - record names of bearers in buffer - */ -struct sk_buff *tipc_bearer_get_names(void) -{ - struct sk_buff *buf; - struct tipc_bearer *b; - int i, j; - - buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); - if (!buf) - return NULL; - - for (i = 0; media_info_array[i] != NULL; i++) { - for (j = 0; j < MAX_BEARERS; j++) { - b = rtnl_dereference(bearer_list[j]); - if (!b) - continue; - if (b->media == media_info_array[i]) { - tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, - b->name, - strlen(b->name) + 1); - } - } - } - return buf; -} - -void tipc_bearer_add_dest(u32 bearer_id, u32 dest) +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b_ptr) { - tipc_bcbearer_sort(&b_ptr->nodes, dest, true); + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true); tipc_disc_add_dest(b_ptr->link_req); } rcu_read_unlock(); } -void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (b_ptr) { - tipc_bcbearer_sort(&b_ptr->nodes, dest, false); + tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false); tipc_disc_remove_dest(b_ptr->link_req); } rcu_read_unlock(); @@ -260,8 +215,10 @@ void tipc_bearer_remove_dest(u32 bearer_id, u32 dest) /** * tipc_enable_bearer - enable bearer with the given name */ -int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) +static int tipc_enable_bearer(struct net *net, const char *name, + u32 disc_domain, u32 priority) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; struct tipc_media *m_ptr; struct tipc_bearer_names b_names; @@ -271,7 +228,7 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) u32 i; int res = -EINVAL; - if (!tipc_own_addr) { + if (!tn->own_addr) { pr_warn("Bearer <%s> rejected, not supported in standalone mode\n", name); return -ENOPROTOOPT; @@ -281,11 +238,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) return -EINVAL; } if (tipc_addr_domain_valid(disc_domain) && - (disc_domain != tipc_own_addr)) { - if (tipc_in_scope(disc_domain, tipc_own_addr)) { - disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK; + (disc_domain != tn->own_addr)) { + if (tipc_in_scope(disc_domain, tn->own_addr)) { + disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; res = 0; /* accept any node in own cluster */ - } else if (in_own_cluster_exact(disc_domain)) + } else if (in_own_cluster_exact(net, disc_domain)) res = 0; /* accept specified node in own cluster */ } if (res) { @@ -313,7 +270,7 @@ restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (!b_ptr) { bearer_id = i; continue; @@ -347,7 +304,7 @@ restart: strcpy(b_ptr->name, name); b_ptr->media = m_ptr; - res = m_ptr->enable_media(b_ptr); + res = m_ptr->enable_media(net, b_ptr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -361,15 +318,15 @@ restart: b_ptr->net_plane = bearer_id + 'A'; b_ptr->priority = priority; - res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr); + res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); if (res) { - bearer_disable(b_ptr, false); + bearer_disable(net, b_ptr, false); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; } - rcu_assign_pointer(bearer_list[bearer_id], b_ptr); + rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -380,11 +337,11 @@ restart: /** * tipc_reset_bearer - Reset all links established over this bearer */ -static int tipc_reset_bearer(struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) { pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_link_reset_list(b_ptr->identity); - tipc_disc_reset(b_ptr); + tipc_link_reset_list(net, b_ptr->identity); + tipc_disc_reset(net, b_ptr); return 0; } @@ -393,49 +350,35 @@ static int tipc_reset_bearer(struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down) +static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, + bool shutting_down) { + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; pr_info("Disabling bearer <%s>\n", b_ptr->name); b_ptr->media->disable_media(b_ptr); - tipc_link_delete_list(b_ptr->identity, shutting_down); + tipc_link_delete_list(net, b_ptr->identity, shutting_down); if (b_ptr->link_req) tipc_disc_delete(b_ptr->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == rtnl_dereference(bearer_list[i])) { - RCU_INIT_POINTER(bearer_list[i], NULL); + if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + RCU_INIT_POINTER(tn->bearer_list[i], NULL); break; } } kfree_rcu(b_ptr, rcu); } -int tipc_disable_bearer(const char *name) -{ - struct tipc_bearer *b_ptr; - int res; - - b_ptr = tipc_bearer_find(name); - if (b_ptr == NULL) { - pr_warn("Attempt to disable unknown bearer <%s>\n", name); - res = -EINVAL; - } else { - bearer_disable(b_ptr, false); - res = 0; - } - return res; -} - -int tipc_enable_l2_media(struct tipc_bearer *b) +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b) { struct net_device *dev; char *driver_name = strchr((const char *)b->name, ':') + 1; /* Find device with specified name */ - dev = dev_get_by_name(&init_net, driver_name); + dev = dev_get_by_name(net, driver_name); if (!dev) return -ENODEV; @@ -474,8 +417,8 @@ void tipc_disable_l2_media(struct tipc_bearer *b) * @b_ptr: the bearer through which the packet is to be sent * @dest: peer destination address */ -int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, - struct tipc_media_addr *dest) +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest) { struct sk_buff *clone; struct net_device *dev; @@ -511,15 +454,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, * The media send routine must not alter the buffer being passed in * as it may be needed for later retransmission! */ -void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (likely(b_ptr)) - b_ptr->media->send_msg(buf, b_ptr, dest); + b_ptr->media->send_msg(net, buf, b_ptr, dest); rcu_read_unlock(); } @@ -539,17 +483,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, { struct tipc_bearer *b_ptr; - if (!net_eq(dev_net(dev), &init_net)) { - kfree_skb(buf); - return NET_RX_DROP; - } - rcu_read_lock(); b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_rcv(buf, b_ptr); + tipc_rcv(dev_net(dev), buf, b_ptr); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -572,11 +511,9 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, void *ptr) { - struct tipc_bearer *b_ptr; struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; + struct net *net = dev_net(dev); + struct tipc_bearer *b_ptr; b_ptr = rtnl_dereference(dev->tipc_ptr); if (!b_ptr) @@ -590,16 +527,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_DOWN: case NETDEV_CHANGEMTU: - tipc_reset_bearer(b_ptr); + tipc_reset_bearer(net, b_ptr); break; case NETDEV_CHANGEADDR: b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, (char *)dev->dev_addr); - tipc_reset_bearer(b_ptr); + tipc_reset_bearer(net, b_ptr); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(b_ptr, false); + bearer_disable(dev_net(dev), b_ptr, false); break; } return NOTIFY_OK; @@ -632,16 +569,17 @@ void tipc_bearer_cleanup(void) dev_remove_pack(&tipc_packet_type); } -void tipc_bearer_stop(void) +void tipc_bearer_stop(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(bearer_list[i]); + b_ptr = rtnl_dereference(tn->bearer_list[i]); if (b_ptr) { - bearer_disable(b_ptr, true); - bearer_list[i] = NULL; + bearer_disable(net, b_ptr, true); + tn->bearer_list[i] = NULL; } } } @@ -654,7 +592,7 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct nlattr *attrs; struct nlattr *prop; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_BEARER_GET); if (!hdr) return -EMSGSIZE; @@ -698,6 +636,8 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) int i = cb->args[0]; struct tipc_bearer *bearer; struct tipc_nl_msg msg; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); if (i == MAX_BEARERS) return 0; @@ -708,7 +648,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); for (i = 0; i < MAX_BEARERS; i++) { - bearer = rtnl_dereference(bearer_list[i]); + bearer = rtnl_dereference(tn->bearer_list[i]); if (!bearer) continue; @@ -730,6 +670,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) struct tipc_bearer *bearer; struct tipc_nl_msg msg; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -753,7 +694,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) msg.seq = info->snd_seq; rtnl_lock(); - bearer = tipc_bearer_find(name); + bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; goto err_out; @@ -778,6 +719,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -794,13 +736,13 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); - bearer = tipc_bearer_find(name); + bearer = tipc_bearer_find(net, name); if (!bearer) { rtnl_unlock(); return -EINVAL; } - bearer_disable(bearer, false); + bearer_disable(net, bearer, false); rtnl_unlock(); return 0; @@ -811,11 +753,13 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) int err; char *bearer; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); u32 domain; u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tipc_own_addr & TIPC_CLUSTER_MASK; + domain = tn->own_addr & TIPC_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -847,7 +791,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = tipc_enable_bearer(bearer, domain, prio); + err = tipc_enable_bearer(net, bearer, domain, prio); if (err) { rtnl_unlock(); return err; @@ -863,6 +807,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; @@ -878,7 +823,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); rtnl_lock(); - b = tipc_bearer_find(name); + b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); return -EINVAL; @@ -913,7 +858,7 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct nlattr *attrs; struct nlattr *prop; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_MEDIA_GET); if (!hdr) return -EMSGSIZE; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 2c1230ac5dfe..6b17795ff8bc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -37,12 +37,13 @@ #ifndef _TIPC_BEARER_H #define _TIPC_BEARER_H -#include "bcast.h" #include "netlink.h" #include <net/genetlink.h> #define MAX_BEARERS 2 #define MAX_MEDIA 2 +#define MAX_NODES 4096 +#define WSIZE 32 /* Identifiers associated with TIPC message header media address info * - address info field is 32 bytes long @@ -59,6 +60,16 @@ #define TIPC_MEDIA_TYPE_IB 2 /** + * struct tipc_node_map - set of node identifiers + * @count: # of nodes in set + * @map: bitmap of node identifiers that are in the set + */ +struct tipc_node_map { + u32 count; + u32 map[MAX_NODES / WSIZE]; +}; + +/** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) * @media_id: TIPC media type identifier @@ -89,10 +100,10 @@ struct tipc_bearer; * @name: media name */ struct tipc_media { - int (*send_msg)(struct sk_buff *buf, + int (*send_msg)(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_media)(struct tipc_bearer *b_ptr); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr); void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, @@ -157,17 +168,11 @@ struct tipc_bearer_names { char if_name[TIPC_MAX_IF_NAME]; }; -struct tipc_link; - -extern struct tipc_bearer __rcu *bearer_list[]; - /* * TIPC routines available to supported media types */ -void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *tb_ptr); -int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); -int tipc_disable_bearer(const char *name); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); /* * Routines made available to TIPC by supported media types @@ -191,21 +196,19 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); -struct sk_buff *tipc_media_get_names(void); -int tipc_enable_l2_media(struct tipc_bearer *b); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); -int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b, - struct tipc_media_addr *dest); +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest); -struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(u32 bearer_id, u32 dest); -void tipc_bearer_remove_dest(u32 bearer_id, u32 dest); -struct tipc_bearer *tipc_bearer_find(const char *name); +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); -void tipc_bearer_stop(void); -void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf, +void tipc_bearer_stop(struct net *net); +void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf, struct tipc_media_addr *dest); #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/config.c b/net/tipc/config.c deleted file mode 100644 index 876f4c6a2631..000000000000 --- a/net/tipc/config.c +++ /dev/null @@ -1,342 +0,0 @@ -/* - * net/tipc/config.c: TIPC configuration management code - * - * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "socket.h" -#include "name_table.h" -#include "config.h" -#include "server.h" - -#define REPLY_TRUNCATED "<truncated>\n" - -static const void *req_tlv_area; /* request message TLV area */ -static int req_tlv_space; /* request message TLV area size */ -static int rep_headroom; /* reply message headroom to use */ - -struct sk_buff *tipc_cfg_reply_alloc(int payload_size) -{ - struct sk_buff *buf; - - buf = alloc_skb(rep_headroom + payload_size, GFP_ATOMIC); - if (buf) - skb_reserve(buf, rep_headroom); - return buf; -} - -int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, - void *tlv_data, int tlv_data_size) -{ - struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf); - int new_tlv_space = TLV_SPACE(tlv_data_size); - - if (skb_tailroom(buf) < new_tlv_space) - return 0; - skb_put(buf, new_tlv_space); - tlv->tlv_type = htons(tlv_type); - tlv->tlv_len = htons(TLV_LENGTH(tlv_data_size)); - if (tlv_data_size && tlv_data) - memcpy(TLV_DATA(tlv), tlv_data, tlv_data_size); - return 1; -} - -static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) -{ - struct sk_buff *buf; - __be32 value_net; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value))); - if (buf) { - value_net = htonl(value); - tipc_cfg_append_tlv(buf, tlv_type, &value_net, - sizeof(value_net)); - } - return buf; -} - -static struct sk_buff *tipc_cfg_reply_unsigned(u32 value) -{ - return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value); -} - -struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string) -{ - struct sk_buff *buf; - int string_len = strlen(string) + 1; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(string_len)); - if (buf) - tipc_cfg_append_tlv(buf, tlv_type, string, string_len); - return buf; -} - -static struct sk_buff *tipc_show_stats(void) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - int str_len; - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(u32 *)TLV_DATA(req_tlv_area)); - if (value != 0) - return tipc_cfg_reply_error_string("unsupported argument"); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (buf == NULL) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - str_len = tipc_snprintf(pb, pb_len, "TIPC version " TIPC_MOD_VER "\n"); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -static struct sk_buff *cfg_enable_bearer(void) -{ - struct tipc_bearer_config *args; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); - if (tipc_enable_bearer(args->name, - ntohl(args->disc_domain), - ntohl(args->priority))) - return tipc_cfg_reply_error_string("unable to enable bearer"); - - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_disable_bearer(void) -{ - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - if (tipc_disable_bearer((char *)TLV_DATA(req_tlv_area))) - return tipc_cfg_reply_error_string("unable to disable bearer"); - - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_own_addr(void) -{ - u32 addr; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (addr == tipc_own_addr) - return tipc_cfg_reply_none(); - if (!tipc_addr_node_valid(addr)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (node address)"); - if (tipc_own_addr) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change node address once assigned)"); - if (!tipc_net_start(addr)) - return tipc_cfg_reply_none(); - - return tipc_cfg_reply_error_string("cannot change to network mode"); -} - -static struct sk_buff *cfg_set_max_ports(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_ports) - return tipc_cfg_reply_none(); - if (value < 127 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max ports must be 127-65535)"); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max ports while TIPC is active)"); -} - -static struct sk_buff *cfg_set_netid(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_net_id) - return tipc_cfg_reply_none(); - if (value < 1 || value > 9999) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network id must be 1-9999)"); - if (tipc_own_addr) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change network id once TIPC has joined a network)"); - tipc_net_id = value; - return tipc_cfg_reply_none(); -} - -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area, - int request_space, int reply_headroom) -{ - struct sk_buff *rep_tlv_buf; - - rtnl_lock(); - - /* Save request and reply details in a well-known location */ - req_tlv_area = request_area; - req_tlv_space = request_space; - rep_headroom = reply_headroom; - - /* Check command authorization */ - if (likely(in_own_node(orig_node))) { - /* command is permitted */ - } else { - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot be done remotely)"); - goto exit; - } - - /* Call appropriate processing routine */ - switch (cmd) { - case TIPC_CMD_NOOP: - rep_tlv_buf = tipc_cfg_reply_none(); - break; - case TIPC_CMD_GET_NODES: - rep_tlv_buf = tipc_node_get_nodes(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_GET_LINKS: - rep_tlv_buf = tipc_node_get_links(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_SHOW_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_show_stats(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_RESET_LINK_STATS: - rep_tlv_buf = tipc_link_cmd_reset_stats(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_SHOW_NAME_TABLE: - rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space); - break; - case TIPC_CMD_GET_BEARER_NAMES: - rep_tlv_buf = tipc_bearer_get_names(); - break; - case TIPC_CMD_GET_MEDIA_NAMES: - rep_tlv_buf = tipc_media_get_names(); - break; - case TIPC_CMD_SHOW_PORTS: - rep_tlv_buf = tipc_sk_socks_show(); - break; - case TIPC_CMD_SHOW_STATS: - rep_tlv_buf = tipc_show_stats(); - break; - case TIPC_CMD_SET_LINK_TOL: - case TIPC_CMD_SET_LINK_PRI: - case TIPC_CMD_SET_LINK_WINDOW: - rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd); - break; - case TIPC_CMD_ENABLE_BEARER: - rep_tlv_buf = cfg_enable_bearer(); - break; - case TIPC_CMD_DISABLE_BEARER: - rep_tlv_buf = cfg_disable_bearer(); - break; - case TIPC_CMD_SET_NODE_ADDR: - rep_tlv_buf = cfg_set_own_addr(); - break; - case TIPC_CMD_SET_MAX_PORTS: - rep_tlv_buf = cfg_set_max_ports(); - break; - case TIPC_CMD_SET_NETID: - rep_tlv_buf = cfg_set_netid(); - break; - case TIPC_CMD_GET_MAX_PORTS: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); - break; - case TIPC_CMD_GET_NETID: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); - break; - case TIPC_CMD_NOT_NET_ADMIN: - rep_tlv_buf = - tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); - break; - case TIPC_CMD_SET_MAX_ZONES: - case TIPC_CMD_GET_MAX_ZONES: - case TIPC_CMD_SET_MAX_SLAVES: - case TIPC_CMD_GET_MAX_SLAVES: - case TIPC_CMD_SET_MAX_CLUSTERS: - case TIPC_CMD_GET_MAX_CLUSTERS: - case TIPC_CMD_SET_MAX_NODES: - case TIPC_CMD_GET_MAX_NODES: - case TIPC_CMD_SET_MAX_SUBSCR: - case TIPC_CMD_GET_MAX_SUBSCR: - case TIPC_CMD_SET_MAX_PUBL: - case TIPC_CMD_GET_MAX_PUBL: - case TIPC_CMD_SET_LOG_SIZE: - case TIPC_CMD_SET_REMOTE_MNG: - case TIPC_CMD_GET_REMOTE_MNG: - case TIPC_CMD_DUMP_LOG: - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (obsolete command)"); - break; - default: - rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (unknown command)"); - break; - } - - WARN_ON(rep_tlv_buf->len > TLV_SPACE(ULTRA_STRING_MAX_LEN)); - - /* Append an error message if we cannot return all requested data */ - if (rep_tlv_buf->len == TLV_SPACE(ULTRA_STRING_MAX_LEN)) { - if (*(rep_tlv_buf->data + ULTRA_STRING_MAX_LEN) != '\0') - sprintf(rep_tlv_buf->data + rep_tlv_buf->len - - sizeof(REPLY_TRUNCATED) - 1, REPLY_TRUNCATED); - } - - /* Return reply buffer */ -exit: - rtnl_unlock(); - return rep_tlv_buf; -} diff --git a/net/tipc/config.h b/net/tipc/config.h deleted file mode 100644 index 47b1bf181612..000000000000 --- a/net/tipc/config.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * net/tipc/config.h: Include file for TIPC configuration service code - * - * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_CONFIG_H -#define _TIPC_CONFIG_H - -/* ---------------------------------------------------------------------- */ - -#include "link.h" - -struct sk_buff *tipc_cfg_reply_alloc(int payload_size); -int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, - void *tlv_data, int tlv_data_size); -struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string); - -static inline struct sk_buff *tipc_cfg_reply_none(void) -{ - return tipc_cfg_reply_alloc(0); -} - -static inline struct sk_buff *tipc_cfg_reply_error_string(char *string) -{ - return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string); -} - -static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string) -{ - return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string); -} - -struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, - const void *req_tlv_area, int req_tlv_space, - int headroom); -#endif diff --git a/net/tipc/core.c b/net/tipc/core.c index a5737b8407dd..935205e6bcfe 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -34,82 +34,88 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "core.h" #include "name_table.h" #include "subscr.h" -#include "config.h" +#include "bearer.h" +#include "net.h" #include "socket.h" #include <linux/module.h> -/* global variables used by multiple sub-systems within TIPC */ -int tipc_random __read_mostly; - /* configurable TIPC parameters */ -u32 tipc_own_addr __read_mostly; -int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ -/** - * tipc_buf_acquire - creates a TIPC message buffer - * @size: message size (including TIPC header) - * - * Returns a new buffer with data pointers set to the specified size. - * - * NOTE: Headroom is reserved to allow prepending of a data link header. - * There may also be unrequested tailroom present at the buffer's end. - */ -struct sk_buff *tipc_buf_acquire(u32 size) +static int __net_init tipc_init_net(struct net *net) { - struct sk_buff *skb; - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); - if (skb) { - skb_reserve(skb, BUF_HEADROOM); - skb_put(skb, size); - skb->next = NULL; - } - return skb; + struct tipc_net *tn = net_generic(net, tipc_net_id); + int err; + + tn->net_id = 4711; + tn->own_addr = 0; + get_random_bytes(&tn->random, sizeof(int)); + INIT_LIST_HEAD(&tn->node_list); + spin_lock_init(&tn->node_list_lock); + + err = tipc_sk_rht_init(net); + if (err) + goto out_sk_rht; + + err = tipc_nametbl_init(net); + if (err) + goto out_nametbl; + + err = tipc_subscr_start(net); + if (err) + goto out_subscr; + return 0; + +out_subscr: + tipc_nametbl_stop(net); +out_nametbl: + tipc_sk_rht_destroy(net); +out_sk_rht: + return err; } -/** - * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode - */ -static void tipc_core_stop(void) +static void __net_exit tipc_exit_net(struct net *net) { - tipc_net_stop(); - tipc_bearer_cleanup(); - tipc_netlink_stop(); - tipc_subscr_stop(); - tipc_nametbl_stop(); - tipc_sk_ref_table_stop(); - tipc_socket_stop(); - tipc_unregister_sysctl(); + tipc_subscr_stop(net); + tipc_net_stop(net); + tipc_nametbl_stop(net); + tipc_sk_rht_destroy(net); } -/** - * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode - */ -static int tipc_core_start(void) +static struct pernet_operations tipc_net_ops = { + .init = tipc_init_net, + .exit = tipc_exit_net, + .id = &tipc_net_id, + .size = sizeof(struct tipc_net), +}; + +static int __init tipc_init(void) { int err; - get_random_bytes(&tipc_random, sizeof(tipc_random)); - - err = tipc_sk_ref_table_init(tipc_max_ports, tipc_random); - if (err) - goto out_reftbl; + pr_info("Activated (version " TIPC_MOD_VER ")\n"); - err = tipc_nametbl_init(); - if (err) - goto out_nametbl; + sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; err = tipc_netlink_start(); if (err) goto out_netlink; + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + err = tipc_socket_init(); if (err) goto out_socket; @@ -118,58 +124,40 @@ static int tipc_core_start(void) if (err) goto out_sysctl; - err = tipc_subscr_start(); + err = register_pernet_subsys(&tipc_net_ops); if (err) - goto out_subscr; + goto out_pernet; err = tipc_bearer_setup(); if (err) goto out_bearer; + pr_info("Started in single node mode\n"); return 0; out_bearer: - tipc_subscr_stop(); -out_subscr: + unregister_pernet_subsys(&tipc_net_ops); +out_pernet: tipc_unregister_sysctl(); out_sysctl: tipc_socket_stop(); out_socket: + tipc_netlink_compat_stop(); +out_netlink_compat: tipc_netlink_stop(); out_netlink: - tipc_nametbl_stop(); -out_nametbl: - tipc_sk_ref_table_stop(); -out_reftbl: + pr_err("Unable to start in single node mode\n"); return err; } -static int __init tipc_init(void) -{ - int res; - - pr_info("Activated (version " TIPC_MOD_VER ")\n"); - - tipc_own_addr = 0; - tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_net_id = 4711; - - sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; - - res = tipc_core_start(); - if (res) - pr_err("Unable to start in single node mode\n"); - else - pr_info("Started in single node mode\n"); - return res; -} - static void __exit tipc_exit(void) { - tipc_core_stop(); + tipc_bearer_cleanup(); + tipc_netlink_stop(); + tipc_netlink_compat_stop(); + tipc_socket_stop(); + tipc_unregister_sysctl(); + unregister_pernet_subsys(&tipc_net_ops); + pr_info("Deactivated\n"); } diff --git a/net/tipc/core.h b/net/tipc/core.h index 84602137ce20..3dc68c7a966d 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -37,8 +37,6 @@ #ifndef _TIPC_CORE_H #define _TIPC_CORE_H -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/tipc.h> #include <linux/tipc_config.h> #include <linux/tipc_netlink.h> @@ -59,47 +57,54 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/etherdevice.h> +#include <net/netns/generic.h> +#include <linux/rhashtable.h> + +#include "node.h" +#include "bearer.h" +#include "bcast.h" +#include "netlink.h" +#include "link.h" +#include "node.h" +#include "msg.h" #define TIPC_MOD_VER "2.0.0" -#define ULTRA_STRING_MAX_LEN 32768 -#define TIPC_MAX_SUBSCRIPTIONS 65535 -#define TIPC_MAX_PUBLICATIONS 65535 +extern int tipc_net_id __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; +extern int sysctl_tipc_named_timeout __read_mostly; -struct tipc_msg; /* msg.h */ +struct tipc_net { + u32 own_addr; + int net_id; + int random; -int tipc_snprintf(char *buf, int len, const char *fmt, ...); + /* Node table and node list */ + spinlock_t node_list_lock; + struct hlist_head node_htable[NODE_HTABLE_SIZE]; + struct list_head node_list; + u32 num_nodes; + u32 num_links; -/* - * TIPC-specific error codes - */ -#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ + /* Bearer list */ + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; -/* - * Global configuration variables - */ -extern u32 tipc_own_addr __read_mostly; -extern int tipc_max_ports __read_mostly; -extern int tipc_net_id __read_mostly; -extern int sysctl_tipc_rmem[3] __read_mostly; -extern int sysctl_tipc_named_timeout __read_mostly; + /* Broadcast link */ + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; -/* - * Other global variables - */ -extern int tipc_random __read_mostly; + /* Socket hash table */ + struct rhashtable sk_rht; -/* - * Routines available to privileged subsystems - */ -int tipc_netlink_start(void); -void tipc_netlink_stop(void); -int tipc_socket_init(void); -void tipc_socket_stop(void); -int tipc_sock_create_local(int type, struct socket **res); -void tipc_sock_release_local(struct socket *sock); -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags); + /* Name table */ + spinlock_t nametbl_lock; + struct name_table *nametbl; + + /* Topology subscription server */ + struct tipc_server *topsrv; + atomic_t subscription_count; +}; #ifdef CONFIG_SYSCTL int tipc_register_sysctl(void); @@ -108,102 +113,4 @@ void tipc_unregister_sysctl(void); #define tipc_register_sysctl() 0 #define tipc_unregister_sysctl() #endif - -/* - * TIPC timer code - */ -typedef void (*Handler) (unsigned long); - -/** - * k_init_timer - initialize a timer - * @timer: pointer to timer structure - * @routine: pointer to routine to invoke when timer expires - * @argument: value to pass to routine when timer expires - * - * Timer must be initialized before use (and terminated when no longer needed). - */ -static inline void k_init_timer(struct timer_list *timer, Handler routine, - unsigned long argument) -{ - setup_timer(timer, routine, argument); -} - -/** - * k_start_timer - start a timer - * @timer: pointer to timer structure - * @msec: time to delay (in ms) - * - * Schedules a previously initialized timer for later execution. - * If timer is already running, the new timeout overrides the previous request. - * - * To ensure the timer doesn't expire before the specified delay elapses, - * the amount of delay is rounded up when converting to the jiffies - * then an additional jiffy is added to account for the fact that - * the starting time may be in the middle of the current jiffy. - */ -static inline void k_start_timer(struct timer_list *timer, unsigned long msec) -{ - mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1); -} - -/** - * k_cancel_timer - cancel a timer - * @timer: pointer to timer structure - * - * Cancels a previously initialized timer. - * Can be called safely even if the timer is already inactive. - * - * WARNING: Must not be called when holding locks required by the timer's - * timeout routine, otherwise deadlock can occur on SMP systems! - */ -static inline void k_cancel_timer(struct timer_list *timer) -{ - del_timer_sync(timer); -} - -/** - * k_term_timer - terminate a timer - * @timer: pointer to timer structure - * - * Prevents further use of a previously initialized timer. - * - * WARNING: Caller must ensure timer isn't currently running. - * - * (Do not "enhance" this routine to automatically cancel an active timer, - * otherwise deadlock can arise when a timeout routine calls k_term_timer.) - */ -static inline void k_term_timer(struct timer_list *timer) -{ -} - -/* - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM LL_MAX_HEADER - -struct tipc_skb_cb { - void *handle; - struct sk_buff *tail; - bool deferred; - bool wakeup_pending; - bool bundling; - u16 chain_sz; - u16 chain_imp; -}; - -#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) - -static inline struct tipc_msg *buf_msg(struct sk_buff *skb) -{ - return (struct tipc_msg *)skb->data; -} - -struct sk_buff *tipc_buf_acquire(u32 size); - #endif diff --git a/net/tipc/discover.c b/net/tipc/discover.c index aa722a42ef8b..feef3753615d 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -1,7 +1,7 @@ /* * net/tipc/discover.c * - * Copyright (c) 2003-2006, 2014, Ericsson AB + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * @@ -38,15 +38,19 @@ #include "link.h" #include "discover.h" -#define TIPC_LINK_REQ_INIT 125 /* min delay during bearer start up */ -#define TIPC_LINK_REQ_FAST 1000 /* max delay if bearer has no links */ -#define TIPC_LINK_REQ_SLOW 60000 /* max delay if bearer has links */ -#define TIPC_LINK_REQ_INACTIVE 0xffffffff /* indicates no timer in use */ - +/* min delay during bearer start up */ +#define TIPC_LINK_REQ_INIT msecs_to_jiffies(125) +/* max delay if bearer has no links */ +#define TIPC_LINK_REQ_FAST msecs_to_jiffies(1000) +/* max delay if bearer has links */ +#define TIPC_LINK_REQ_SLOW msecs_to_jiffies(60000) +/* indicates no timer in use */ +#define TIPC_LINK_REQ_INACTIVE 0xffffffff /** * struct tipc_link_req - information about an ongoing link setup request * @bearer_id: identity of bearer issuing requests + * @net: network namespace instance * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) @@ -58,31 +62,35 @@ struct tipc_link_req { u32 bearer_id; struct tipc_media_addr dest; + struct net *net; u32 domain; int num_nodes; spinlock_t lock; struct sk_buff *buf; struct timer_list timer; - unsigned int timer_intv; + unsigned long timer_intv; }; /** * tipc_disc_init_msg - initialize a link setup message + * @net: the applicable net namespace * @type: message type (request or response) * @b_ptr: ptr to bearer issuing message */ -static void tipc_disc_init_msg(struct sk_buff *buf, u32 type, +static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, struct tipc_bearer *b_ptr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg; u32 dest_domain = b_ptr->domain; msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); + tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, + INT_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); - msg_set_node_sig(msg, tipc_random); + msg_set_node_sig(msg, tn->random); msg_set_dest_domain(msg, dest_domain); - msg_set_bc_netid(msg, tipc_net_id); + msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); } @@ -107,11 +115,14 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, /** * tipc_disc_rcv - handle incoming discovery message (request or response) + * @net: the applicable net namespace * @buf: buffer containing message * @bearer: bearer that message arrived on */ -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *bearer) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_link *link; struct tipc_media_addr maddr; @@ -133,7 +144,7 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) kfree_skb(buf); /* Ensure message from node is valid and communication is permitted */ - if (net_id != tipc_net_id) + if (net_id != tn->net_id) return; if (maddr.broadcast) return; @@ -142,23 +153,19 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) if (!tipc_addr_node_valid(onode)) return; - if (in_own_node(onode)) { + if (in_own_node(net, onode)) { if (memcmp(&maddr, &bearer->addr, sizeof(maddr))) - disc_dupl_alert(bearer, tipc_own_addr, &maddr); + disc_dupl_alert(bearer, tn->own_addr, &maddr); return; } - if (!tipc_in_scope(ddom, tipc_own_addr)) + if (!tipc_in_scope(ddom, tn->own_addr)) return; if (!tipc_in_scope(bearer->domain, onode)) return; - /* Locate, or if necessary, create, node: */ - node = tipc_node_find(onode); - if (!node) - node = tipc_node_create(onode); + node = tipc_node_create(net, onode); if (!node) return; - tipc_node_lock(node); link = node->links[bearer->identity]; @@ -244,8 +251,8 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer) if (respond && (mtyp == DSC_REQ_MSG)) { rbuf = tipc_buf_acquire(INT_H_SIZE); if (rbuf) { - tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer); - tipc_bearer_send(bearer->identity, rbuf, &maddr); + tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); + tipc_bearer_send(net, bearer->identity, rbuf, &maddr); kfree_skb(rbuf); } } @@ -265,7 +272,7 @@ static void disc_update(struct tipc_link_req *req) if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) || (req->timer_intv > TIPC_LINK_REQ_FAST)) { req->timer_intv = TIPC_LINK_REQ_INIT; - k_start_timer(&req->timer, req->timer_intv); + mod_timer(&req->timer, jiffies + req->timer_intv); } } } @@ -295,12 +302,13 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) /** * disc_timeout - send a periodic link setup request - * @req: ptr to link request structure + * @data: ptr to link request structure * * Called whenever a link setup request timer associated with a bearer expires. */ -static void disc_timeout(struct tipc_link_req *req) +static void disc_timeout(unsigned long data) { + struct tipc_link_req *req = (struct tipc_link_req *)data; int max_delay; spin_lock_bh(&req->lock); @@ -318,7 +326,7 @@ static void disc_timeout(struct tipc_link_req *req) * hold at fast polling rate if don't have any associated nodes, * otherwise hold at slow polling rate */ - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest); req->timer_intv *= 2; @@ -329,20 +337,22 @@ static void disc_timeout(struct tipc_link_req *req) if (req->timer_intv > max_delay) req->timer_intv = max_delay; - k_start_timer(&req->timer, req->timer_intv); + mod_timer(&req->timer, jiffies + req->timer_intv); exit: spin_unlock_bh(&req->lock); } /** * tipc_disc_create - create object to send periodic link setup requests + * @net: the applicable net namespace * @b_ptr: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain to which links can be established * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -356,17 +366,18 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) return -ENOMEM; } - tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); memcpy(&req->dest, dest, sizeof(*dest)); + req->net = net; req->bearer_id = b_ptr->identity; req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); - k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); - k_start_timer(&req->timer, req->timer_intv); + setup_timer(&req->timer, disc_timeout, (unsigned long)req); + mod_timer(&req->timer, jiffies + req->timer_intv); b_ptr->link_req = req; - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); return 0; } @@ -376,28 +387,29 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest) */ void tipc_disc_delete(struct tipc_link_req *req) { - k_cancel_timer(&req->timer); - k_term_timer(&req->timer); + del_timer_sync(&req->timer); kfree_skb(req->buf); kfree(req); } /** * tipc_disc_reset - reset object to send periodic link setup requests + * @net: the applicable net namespace * @b_ptr: ptr to bearer issuing requests * @dest_domain: network domain to which links can be established */ -void tipc_disc_reset(struct tipc_bearer *b_ptr) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) { struct tipc_link_req *req = b_ptr->link_req; spin_lock_bh(&req->lock); - tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + req->net = net; req->bearer_id = b_ptr->identity; req->domain = b_ptr->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; - k_start_timer(&req->timer, req->timer_intv); - tipc_bearer_send(req->bearer_id, req->buf, &req->dest); + mod_timer(&req->timer, jiffies + req->timer_intv); + tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest); spin_unlock_bh(&req->lock); } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 515b57392f4d..c9b12770c5ed 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -39,11 +39,13 @@ struct tipc_link_req; -int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest); void tipc_disc_delete(struct tipc_link_req *req); -void tipc_disc_reset(struct tipc_bearer *b_ptr); +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); void tipc_disc_remove_dest(struct tipc_link_req *req); -void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr); +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 23bcc1132365..a4cf364316de 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -40,7 +40,6 @@ #include "socket.h" #include "name_distr.h" #include "discover.h" -#include "config.h" #include "netlink.h" #include <linux/pkt_sched.h> @@ -101,19 +100,20 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { */ #define START_CHANGEOVER 100000u -static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, - struct sk_buff *buf); -static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); -static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, - struct sk_buff **buf); -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); +static void link_handle_out_of_seq_msg(struct tipc_link *link, + struct sk_buff *skb); +static void tipc_link_proto_rcv(struct tipc_link *link, + struct sk_buff *skb); +static int tipc_link_tunnel_rcv(struct tipc_node *node, + struct sk_buff **skb); +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); -static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf); -static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf); +static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); +static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); /* * Simple link routines @@ -123,13 +123,30 @@ static unsigned int align(unsigned int i) return (i + 3) & ~3u; } +static void tipc_link_release(struct kref *kref) +{ + kfree(container_of(kref, struct tipc_link, ref)); +} + +static void tipc_link_get(struct tipc_link *l_ptr) +{ + kref_get(&l_ptr->ref); +} + +static void tipc_link_put(struct tipc_link *l_ptr) +{ + kref_put(&l_ptr->ref, tipc_link_release); +} + static void link_init_max_pkt(struct tipc_link *l_ptr) { + struct tipc_node *node = l_ptr->owner; + struct tipc_net *tn = net_generic(node->net, tipc_net_id); struct tipc_bearer *b_ptr; u32 max_pkt; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); if (!b_ptr) { rcu_read_unlock(); return; @@ -169,8 +186,9 @@ int tipc_link_is_active(struct tipc_link *l_ptr) * link_timeout - handle expiration of link timer * @l_ptr: pointer to link */ -static void link_timeout(struct tipc_link *l_ptr) +static void link_timeout(unsigned long data) { + struct tipc_link *l_ptr = (struct tipc_link *)data; struct sk_buff *skb; tipc_node_lock(l_ptr->owner); @@ -215,11 +233,13 @@ static void link_timeout(struct tipc_link *l_ptr) tipc_link_push_packets(l_ptr); tipc_node_unlock(l_ptr->owner); + tipc_link_put(l_ptr); } -static void link_set_timer(struct tipc_link *l_ptr, u32 time) +static void link_set_timer(struct tipc_link *link, unsigned long time) { - k_start_timer(&l_ptr->timer, time); + if (!mod_timer(&link->timer, jiffies + time)) + tipc_link_get(link); } /** @@ -234,6 +254,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_msg *msg; char *if_name; @@ -259,12 +280,12 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, pr_warn("Link creation failed, no memory\n"); return NULL; } - + kref_init(&l_ptr->ref); l_ptr->addr = peer; if_name = strchr(b_ptr->name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", - tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), + tipc_zone(tn->own_addr), tipc_cluster(tn->own_addr), + tipc_node(tn->own_addr), if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); /* note: peer i/f name is updated by reset/activate message */ @@ -278,9 +299,10 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; - tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(tn->own_addr, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, + l_ptr->addr); msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, (tipc_random & 0xffff)); + msg_set_session(msg, (tn->random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); @@ -293,48 +315,52 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->next_out_no = 1; __skb_queue_head_init(&l_ptr->outqueue); __skb_queue_head_init(&l_ptr->deferred_queue); - skb_queue_head_init(&l_ptr->waiting_sks); - + skb_queue_head_init(&l_ptr->wakeupq); + skb_queue_head_init(&l_ptr->inputq); + skb_queue_head_init(&l_ptr->namedq); link_reset_statistics(l_ptr); - tipc_node_attach_link(n_ptr, l_ptr); - - k_init_timer(&l_ptr->timer, (Handler)link_timeout, - (unsigned long)l_ptr); - + setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr); link_state_event(l_ptr, STARTING_EVT); return l_ptr; } -void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) +/** + * link_delete - Conditional deletion of link. + * If timer still running, real delete is done when it expires + * @link: link to be deleted + */ +void tipc_link_delete(struct tipc_link *link) +{ + tipc_link_reset_fragments(link); + tipc_node_detach_link(link->owner, link); + tipc_link_put(link); +} + +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down) { - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *link; + struct tipc_node *node; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->links[bearer_id]; - if (l_ptr) { - tipc_link_reset(l_ptr); - if (shutting_down || !tipc_node_is_up(n_ptr)) { - tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_reset_fragments(l_ptr); - tipc_node_unlock(n_ptr); - - /* Nobody else can access this link now: */ - del_timer_sync(&l_ptr->timer); - kfree(l_ptr); - } else { - /* Detach/delete when failover is finished: */ - l_ptr->flags |= LINK_STOPPED; - tipc_node_unlock(n_ptr); - del_timer_sync(&l_ptr->timer); - } + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_lock(node); + link = node->links[bearer_id]; + if (!link) { + tipc_node_unlock(node); continue; } - tipc_node_unlock(n_ptr); + tipc_link_reset(link); + if (del_timer(&link->timer)) + tipc_link_put(link); + link->flags |= LINK_STOPPED; + /* Delete link now, or when failover is finished: */ + if (shutting_down || !tipc_node_is_up(node)) + tipc_link_delete(link); + tipc_node_unlock(node); } rcu_read_unlock(); } @@ -352,13 +378,14 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, { struct sk_buff *buf; - buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, tipc_own_addr, - tipc_own_addr, oport, 0, 0); + buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + link_own_addr(link), link_own_addr(link), + oport, 0, 0); if (!buf) return false; TIPC_SKB_CB(buf)->chain_sz = chain_sz; TIPC_SKB_CB(buf)->chain_imp = imp; - skb_queue_tail(&link->waiting_sks, buf); + skb_queue_tail(&link->wakeupq, buf); link->stats.link_congs++; return true; } @@ -369,17 +396,19 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, * Move a number of waiting users, as permitted by available space in * the send queue, from link wait queue to node wait queue for wakeup */ -static void link_prepare_wakeup(struct tipc_link *link) +void link_prepare_wakeup(struct tipc_link *link) { uint pend_qsz = skb_queue_len(&link->outqueue); struct sk_buff *skb, *tmp; - skb_queue_walk_safe(&link->waiting_sks, skb, tmp) { + skb_queue_walk_safe(&link->wakeupq, skb, tmp) { if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(skb)->chain_imp]) break; pend_qsz += TIPC_SKB_CB(skb)->chain_sz; - skb_unlink(skb, &link->waiting_sks); - skb_queue_tail(&link->owner->waiting_sks, skb); + skb_unlink(skb, &link->wakeupq); + skb_queue_tail(&link->inputq, skb); + link->owner->inputq = &link->inputq; + link->owner->action_flags |= TIPC_MSG_EVT; } } @@ -425,20 +454,20 @@ void tipc_link_reset(struct tipc_link *l_ptr) return; tipc_node_link_down(l_ptr->owner, l_ptr); - tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr); + tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; } - /* Clean up all queues: */ + /* Clean up all queues, except inputq: */ __skb_queue_purge(&l_ptr->outqueue); __skb_queue_purge(&l_ptr->deferred_queue); - if (!skb_queue_empty(&l_ptr->waiting_sks)) { - skb_queue_splice_init(&l_ptr->waiting_sks, &owner->waiting_sks); - owner->action_flags |= TIPC_WAKEUP_USERS; - } + skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq); + if (!skb_queue_empty(&l_ptr->inputq)) + owner->action_flags |= TIPC_MSG_EVT; + owner->inputq = &l_ptr->inputq; l_ptr->next_out = NULL; l_ptr->unacked_window = 0; l_ptr->checkpoint = 1; @@ -448,13 +477,14 @@ void tipc_link_reset(struct tipc_link *l_ptr) link_reset_statistics(l_ptr); } -void tipc_link_reset_list(unsigned int bearer_id) +void tipc_link_reset_list(struct net *net, unsigned int bearer_id) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_node *n_ptr; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); l_ptr = n_ptr->links[bearer_id]; if (l_ptr) @@ -464,11 +494,14 @@ void tipc_link_reset_list(unsigned int bearer_id) rcu_read_unlock(); } -static void link_activate(struct tipc_link *l_ptr) +static void link_activate(struct tipc_link *link) { - l_ptr->next_in_no = l_ptr->stats.recv_info = 1; - tipc_node_link_up(l_ptr->owner, l_ptr); - tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr); + struct tipc_node *node = link->owner; + + link->next_in_no = 1; + link->stats.recv_info = 1; + tipc_node_link_up(node, link); + tipc_bearer_add_dest(node->net, link->bearer_id, link->addr); } /** @@ -479,7 +512,7 @@ static void link_activate(struct tipc_link *l_ptr) static void link_state_event(struct tipc_link *l_ptr, unsigned int event) { struct tipc_link *other; - u32 cont_intv = l_ptr->continuity_interval; + unsigned long cont_intv = l_ptr->cont_intv; if (l_ptr->flags & LINK_STOPPED) return; @@ -522,8 +555,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv / 4); break; case RESET_MSG: - pr_info("%s<%s>, requested by peer\n", link_rst_msg, - l_ptr->name); + pr_debug("%s<%s>, requested by peer\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -533,7 +566,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; default: - pr_err("%s%u in WW state\n", link_unk_evt, event); + pr_debug("%s%u in WW state\n", link_unk_evt, event); } break; case WORKING_UNKNOWN: @@ -545,8 +578,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; case RESET_MSG: - pr_info("%s<%s>, requested by peer while probing\n", - link_rst_msg, l_ptr->name); + pr_debug("%s<%s>, requested by peer while probing\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -572,8 +605,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ - pr_warn("%s<%s>, peer not responding\n", - link_rst_msg, l_ptr->name); + pr_debug("%s<%s>, peer not responding\n", + link_rst_msg, l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; @@ -614,7 +647,9 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case STARTING_EVT: l_ptr->flags |= LINK_STARTED; - /* fall through */ + l_ptr->fsm_msg_cnt++; + link_set_timer(l_ptr, cont_intv); + break; case TIMEOUT_EVT: tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; @@ -700,7 +735,8 @@ drop: * Only the socket functions tipc_send_stream() and tipc_send_packet() need * to act on the return value, since they may need to do more send attempts. */ -int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list) { struct tipc_msg *msg = buf_msg(skb_peek(list)); uint psz = msg_size(msg); @@ -733,7 +769,8 @@ int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) if (skb_queue_len(outqueue) < sndlim) { __skb_queue_tail(outqueue, skb); - tipc_bearer_send(link->bearer_id, skb, addr); + tipc_bearer_send(net, link->bearer_id, + skb, addr); link->next_out = NULL; link->unacked_window = 0; } else if (tipc_msg_bundle(outqueue, skb, mtu)) { @@ -758,7 +795,7 @@ int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list) static void skb2list(struct sk_buff *skb, struct sk_buff_head *list) { - __skb_queue_head_init(list); + skb_queue_head_init(list); __skb_queue_tail(list, skb); } @@ -767,19 +804,21 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) struct sk_buff_head head; skb2list(skb, &head); - return __tipc_link_xmit(link, &head); + return __tipc_link_xmit(link->owner->net, link, &head); } -int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector) +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, + u32 selector) { struct sk_buff_head head; skb2list(skb, &head); - return tipc_link_xmit(&head, dnode, selector); + return tipc_link_xmit(net, &head, dnode, selector); } /** * tipc_link_xmit() is the general link level function for message sending + * @net: the applicable net namespace * @list: chain of buffers containing message * @dsz: amount of user data to be sent * @dnode: address of destination node @@ -787,33 +826,28 @@ int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector) * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_link_xmit(struct sk_buff_head *list, u32 dnode, u32 selector) +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, + u32 selector) { struct tipc_link *link = NULL; struct tipc_node *node; int rc = -EHOSTUNREACH; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (node) { tipc_node_lock(node); link = node->active_links[selector & 1]; if (link) - rc = __tipc_link_xmit(link, list); + rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); } - if (link) return rc; - if (likely(in_own_node(dnode))) { - /* As a node local message chain never contains more than one - * buffer, we just need to dequeue one SKB buffer from the - * head list. - */ - return tipc_sk_rcv(__skb_dequeue(list)); - } - __skb_queue_purge(list); + if (likely(in_own_node(net, dnode))) + return tipc_sk_rcv(net, list); + __skb_queue_purge(list); return rc; } @@ -835,7 +869,8 @@ static void tipc_link_sync_xmit(struct tipc_link *link) return; msg = buf_msg(skb); - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr); + tipc_msg_init(link_own_addr(link), msg, BCAST_PROTOCOL, STATE_MSG, + INT_H_SIZE, link->addr); msg_set_last_bcast(msg, link->owner->bclink.acked); __tipc_link_xmit_skb(link, skb); } @@ -890,7 +925,8 @@ void tipc_link_push_packets(struct tipc_link *l_ptr) msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); if (msg_user(msg) == MSG_BUNDLER) TIPC_SKB_CB(skb)->bundling = false; - tipc_bearer_send(l_ptr->bearer_id, skb, + tipc_bearer_send(l_ptr->owner->net, + l_ptr->bearer_id, skb, &l_ptr->media_addr); l_ptr->next_out = tipc_skb_queue_next(outqueue, skb); } else { @@ -923,6 +959,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); + struct net *net = l_ptr->owner->net; pr_warn("Retransmission failure on link <%s>\n", l_ptr->name); @@ -940,7 +977,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, pr_cont("Outstanding acks: %lu\n", (unsigned long) TIPC_SKB_CB(buf)->handle); - n_ptr = tipc_bclink_retransmit_to(); + n_ptr = tipc_bclink_retransmit_to(net); tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); @@ -955,7 +992,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, tipc_node_unlock(n_ptr); - tipc_bclink_set_flags(TIPC_BCLINK_RESET); + tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); l_ptr->stale_count = 0; } } @@ -987,7 +1024,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, msg = buf_msg(skb); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - tipc_bearer_send(l_ptr->bearer_id, skb, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb, + &l_ptr->media_addr); retransmits--; l_ptr->stats.retransmitted++; } @@ -1063,14 +1101,16 @@ static int link_recv_buf_validate(struct sk_buff *buf) /** * tipc_rcv - process TIPC packets/messages arriving from off-node + * @net: the applicable net namespace * @skb: TIPC packet * @b_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff_head head; struct tipc_node *n_ptr; struct tipc_link *l_ptr; @@ -1096,19 +1136,19 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) if (unlikely(msg_non_seq(msg))) { if (msg_user(msg) == LINK_CONFIG) - tipc_disc_rcv(skb, b_ptr); + tipc_disc_rcv(net, skb, b_ptr); else - tipc_bclink_rcv(skb); + tipc_bclink_rcv(net, skb); continue; } /* Discard unicast link messages destined for another node */ if (unlikely(!msg_short(msg) && - (msg_destnode(msg) != tipc_own_addr))) + (msg_destnode(msg) != tn->own_addr))) goto discard; /* Locate neighboring node that sent message */ - n_ptr = tipc_node_find(msg_prevnode(msg)); + n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; tipc_node_lock(n_ptr); @@ -1116,7 +1156,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) - goto unlock_discard; + goto unlock; /* Verify that communication with node is currently allowed */ if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) && @@ -1127,7 +1167,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN; if (tipc_node_blocked(n_ptr)) - goto unlock_discard; + goto unlock; /* Validate message sequence number info */ seq_no = msg_seqno(msg); @@ -1151,18 +1191,16 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) if (unlikely(l_ptr->next_out)) tipc_link_push_packets(l_ptr); - if (released && !skb_queue_empty(&l_ptr->waiting_sks)) { + if (released && !skb_queue_empty(&l_ptr->wakeupq)) link_prepare_wakeup(l_ptr); - l_ptr->owner->action_flags |= TIPC_WAKEUP_USERS; - } /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { tipc_link_proto_rcv(l_ptr, skb); link_retrieve_defq(l_ptr, &head); - tipc_node_unlock(n_ptr); - continue; + skb = NULL; + goto unlock; } /* Traffic message. Conditionally activate link */ @@ -1171,18 +1209,18 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) if (link_working_working(l_ptr)) { /* Re-insert buffer in front of queue */ __skb_queue_head(&head, skb); - tipc_node_unlock(n_ptr); - continue; + skb = NULL; + goto unlock; } - goto unlock_discard; + goto unlock; } /* Link is now in state WORKING_WORKING */ if (unlikely(seq_no != mod(l_ptr->next_in_no))) { link_handle_out_of_seq_msg(l_ptr, skb); link_retrieve_defq(l_ptr, &head); - tipc_node_unlock(n_ptr); - continue; + skb = NULL; + goto unlock; } l_ptr->next_in_no++; if (unlikely(!skb_queue_empty(&l_ptr->deferred_queue))) @@ -1192,95 +1230,102 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr) l_ptr->stats.sent_acks++; tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } - - if (tipc_link_prepare_input(l_ptr, &skb)) { - tipc_node_unlock(n_ptr); - continue; - } - tipc_node_unlock(n_ptr); - - if (tipc_link_input(l_ptr, skb) != 0) - goto discard; - continue; -unlock_discard: + tipc_link_input(l_ptr, skb); + skb = NULL; +unlock: tipc_node_unlock(n_ptr); discard: - kfree_skb(skb); + if (unlikely(skb)) + kfree_skb(skb); } } -/** - * tipc_link_prepare_input - process TIPC link messages - * - * returns nonzero if the message was consumed +/* tipc_data_input - deliver data and name distr msgs to upper layer * + * Consumes buffer if message is of right type * Node lock must be held */ -static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) +static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) { - struct tipc_node *n; - struct tipc_msg *msg; - int res = -EINVAL; + struct tipc_node *node = link->owner; + struct tipc_msg *msg = buf_msg(skb); + u32 dport = msg_destport(msg); - n = l->owner; - msg = buf_msg(*buf); switch (msg_user(msg)) { - case CHANGEOVER_PROTOCOL: - if (tipc_link_tunnel_rcv(n, buf)) - res = 0; - break; - case MSG_FRAGMENTER: - l->stats.recv_fragments++; - if (tipc_buf_append(&l->reasm_buf, buf)) { - l->stats.recv_fragmented++; - res = 0; - } else if (!l->reasm_buf) { - tipc_link_reset(l); + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: + if (tipc_skb_queue_tail(&link->inputq, skb, dport)) { + node->inputq = &link->inputq; + node->action_flags |= TIPC_MSG_EVT; } - break; - case MSG_BUNDLER: - l->stats.recv_bundles++; - l->stats.recv_bundled += msg_msgcnt(msg); - res = 0; - break; + return true; case NAME_DISTRIBUTOR: - n->bclink.recv_permitted = true; - res = 0; - break; + node->bclink.recv_permitted = true; + node->namedq = &link->namedq; + skb_queue_tail(&link->namedq, skb); + if (skb_queue_len(&link->namedq) == 1) + node->action_flags |= TIPC_NAMED_MSG_EVT; + return true; + case MSG_BUNDLER: + case CHANGEOVER_PROTOCOL: + case MSG_FRAGMENTER: case BCAST_PROTOCOL: - tipc_link_sync_rcv(n, *buf); - break; + return false; default: - res = 0; - } - return res; + pr_warn("Dropping received illegal msg type\n"); + kfree_skb(skb); + return false; + }; } -/** - * tipc_link_input - Deliver message too higher layers + +/* tipc_link_input - process packet that has passed link protocol check + * + * Consumes buffer + * Node lock must be held */ -static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf) +static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(buf); - int res = 0; + struct tipc_node *node = link->owner; + struct tipc_msg *msg = buf_msg(skb); + struct sk_buff *iskb; + int pos = 0; + + if (likely(tipc_data_input(link, skb))) + return; switch (msg_user(msg)) { - case TIPC_LOW_IMPORTANCE: - case TIPC_MEDIUM_IMPORTANCE: - case TIPC_HIGH_IMPORTANCE: - case TIPC_CRITICAL_IMPORTANCE: - case CONN_MANAGER: - tipc_sk_rcv(buf); + case CHANGEOVER_PROTOCOL: + if (!tipc_link_tunnel_rcv(node, &skb)) + break; + if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { + tipc_data_input(link, skb); + break; + } + case MSG_BUNDLER: + link->stats.recv_bundles++; + link->stats.recv_bundled += msg_msgcnt(msg); + + while (tipc_msg_extract(skb, &iskb, &pos)) + tipc_data_input(link, iskb); break; - case NAME_DISTRIBUTOR: - tipc_named_rcv(buf); + case MSG_FRAGMENTER: + link->stats.recv_fragments++; + if (tipc_buf_append(&link->reasm_buf, &skb)) { + link->stats.recv_fragmented++; + tipc_data_input(link, skb); + } else if (!link->reasm_buf) { + tipc_link_reset(link); + } break; - case MSG_BUNDLER: - tipc_link_bundle_rcv(buf); + case BCAST_PROTOCOL: + tipc_link_sync_rcv(node, skb); break; default: - res = -EINVAL; - } - return res; + break; + }; } /** @@ -1381,7 +1426,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_type(msg, msg_typ); msg_set_net_plane(msg, l_ptr->net_plane); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_last_bcast(msg, tipc_bclink_get_last_sent()); + msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net)); if (msg_typ == STATE_MSG) { u32 next_sent = mod(l_ptr->next_out_no); @@ -1445,7 +1490,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); + tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, + &l_ptr->media_addr); l_ptr->unacked_window = 0; kfree_skb(buf); } @@ -1455,7 +1501,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, * Note that network plane id propagates through the network, and may * change at any time. The node with lowest address rules */ -static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) +static void tipc_link_proto_rcv(struct tipc_link *l_ptr, + struct sk_buff *buf) { u32 rec_gap = 0; u32 max_pkt_info; @@ -1468,7 +1515,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) goto exit; if (l_ptr->net_plane != msg_net_plane(msg)) - if (tipc_own_addr > msg_prevnode(msg)) + if (link_own_addr(l_ptr) > msg_prevnode(msg)) l_ptr->net_plane = msg_net_plane(msg); switch (msg_type(msg)) { @@ -1535,9 +1582,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) if (msg_linkprio(msg) && (msg_linkprio(msg) != l_ptr->priority)) { - pr_warn("%s<%s>, priority change %u->%u\n", - link_rst_msg, l_ptr->name, l_ptr->priority, - msg_linkprio(msg)); + pr_debug("%s<%s>, priority change %u->%u\n", + link_rst_msg, l_ptr->name, + l_ptr->priority, msg_linkprio(msg)); l_ptr->priority = msg_linkprio(msg); tipc_link_reset(l_ptr); /* Enforce change to take effect */ break; @@ -1636,8 +1683,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (!tunnel) return; - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, + ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); @@ -1694,8 +1741,8 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct sk_buff *skb; struct tipc_msg tunnel_hdr; - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, + DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue)); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); skb_queue_walk(&l_ptr->outqueue, skb) { @@ -1729,7 +1776,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, * @from_pos: offset to extract from * * Returns a new message buffer containing an embedded message. The - * encapsulating message itself is left unchanged. + * encapsulating buffer is left unchanged. */ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) { @@ -1743,8 +1790,6 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) return eb; } - - /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. * Owner node is locked. */ @@ -1804,10 +1849,8 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, } } exit: - if ((l_ptr->exp_msg_count == 0) && (l_ptr->flags & LINK_STOPPED)) { - tipc_node_detach_link(l_ptr->owner, l_ptr); - kfree(l_ptr); - } + if ((!l_ptr->exp_msg_count) && (l_ptr->flags & LINK_STOPPED)) + tipc_link_delete(l_ptr); return buf; } @@ -1845,50 +1888,16 @@ exit: return *buf != NULL; } -/* - * Bundler functionality: - */ -void tipc_link_bundle_rcv(struct sk_buff *buf) +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) { - u32 msgcount = msg_msgcnt(buf_msg(buf)); - u32 pos = INT_H_SIZE; - struct sk_buff *obuf; - struct tipc_msg *omsg; - - while (msgcount--) { - obuf = buf_extract(buf, pos); - if (obuf == NULL) { - pr_warn("Link unable to unbundle message(s)\n"); - break; - } - omsg = buf_msg(obuf); - pos += align(msg_size(omsg)); - if (msg_isdata(omsg)) { - if (unlikely(msg_type(omsg) == TIPC_MCAST_MSG)) - tipc_sk_mcast_rcv(obuf); - else - tipc_sk_rcv(obuf); - } else if (msg_user(omsg) == CONN_MANAGER) { - tipc_sk_rcv(obuf); - } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { - tipc_named_rcv(obuf); - } else { - pr_warn("Illegal bundled msg: %u\n", msg_user(omsg)); - kfree_skb(obuf); - } - } - kfree_skb(buf); -} + unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; -static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) -{ - if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) return; - l_ptr->tolerance = tolerance; - l_ptr->continuity_interval = - ((tolerance / 4) > 500) ? 500 : tolerance / 4; - l_ptr->abort_limit = tolerance / (l_ptr->continuity_interval / 4); + l_ptr->tolerance = tol; + l_ptr->cont_intv = msecs_to_jiffies(intv); + l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); } void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) @@ -1911,22 +1920,25 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) } /* tipc_link_find_owner - locate owner node of link by link's name + * @net: the applicable net namespace * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ -static struct tipc_node *tipc_link_find_owner(const char *link_name, +static struct tipc_node *tipc_link_find_owner(struct net *net, + const char *link_name, unsigned int *bearer_id) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l_ptr; struct tipc_node *n_ptr; - struct tipc_node *found_node = 0; + struct tipc_node *found_node = NULL; int i; *bearer_id = 0; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { l_ptr = n_ptr->links[i]; @@ -1946,148 +1958,6 @@ static struct tipc_node *tipc_link_find_owner(const char *link_name, } /** - * link_value_is_valid -- validate proposed link tolerance/priority/window - * - * @cmd: value type (TIPC_CMD_SET_LINK_*) - * @new_value: the new value - * - * Returns 1 if value is within range, 0 if not. - */ -static int link_value_is_valid(u16 cmd, u32 new_value) -{ - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - return (new_value >= TIPC_MIN_LINK_TOL) && - (new_value <= TIPC_MAX_LINK_TOL); - case TIPC_CMD_SET_LINK_PRI: - return (new_value <= TIPC_MAX_LINK_PRI); - case TIPC_CMD_SET_LINK_WINDOW: - return (new_value >= TIPC_MIN_LINK_WIN) && - (new_value <= TIPC_MAX_LINK_WIN); - } - return 0; -} - -/** - * link_cmd_set_value - change priority/tolerance/window for link/bearer/media - * @name: ptr to link, bearer, or media name - * @new_value: new value of link, bearer, or media setting - * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) - * - * Caller must hold RTNL lock to ensure link/bearer/media is not deleted. - * - * Returns 0 if value updated and negative value on error. - */ -static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) -{ - struct tipc_node *node; - struct tipc_link *l_ptr; - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; - int bearer_id; - int res = 0; - - node = tipc_link_find_owner(name, &bearer_id); - if (node) { - tipc_node_lock(node); - l_ptr = node->links[bearer_id]; - - if (l_ptr) { - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - link_set_supervision_props(l_ptr, new_value); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, - new_value, 0, 0); - break; - case TIPC_CMD_SET_LINK_PRI: - l_ptr->priority = new_value; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, - 0, new_value, 0); - break; - case TIPC_CMD_SET_LINK_WINDOW: - tipc_link_set_queue_limits(l_ptr, new_value); - break; - default: - res = -EINVAL; - break; - } - } - tipc_node_unlock(node); - return res; - } - - b_ptr = tipc_bearer_find(name); - if (b_ptr) { - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - b_ptr->tolerance = new_value; - break; - case TIPC_CMD_SET_LINK_PRI: - b_ptr->priority = new_value; - break; - case TIPC_CMD_SET_LINK_WINDOW: - b_ptr->window = new_value; - break; - default: - res = -EINVAL; - break; - } - return res; - } - - m_ptr = tipc_media_find(name); - if (!m_ptr) - return -ENODEV; - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - m_ptr->tolerance = new_value; - break; - case TIPC_CMD_SET_LINK_PRI: - m_ptr->priority = new_value; - break; - case TIPC_CMD_SET_LINK_WINDOW: - m_ptr->window = new_value; - break; - default: - res = -EINVAL; - break; - } - return res; -} - -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, - u16 cmd) -{ - struct tipc_link_config *args; - u32 new_value; - int res; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - args = (struct tipc_link_config *)TLV_DATA(req_tlv_area); - new_value = ntohl(args->value); - - if (!link_value_is_valid(cmd, new_value)) - return tipc_cfg_reply_error_string( - "cannot change, value invalid"); - - if (!strcmp(args->name, tipc_bclink_name)) { - if ((cmd == TIPC_CMD_SET_LINK_WINDOW) && - (tipc_bclink_set_queue_limits(new_value) == 0)) - return tipc_cfg_reply_none(); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change setting on broadcast link)"); - } - - res = link_cmd_set_value(args->name, new_value, cmd); - if (res) - return tipc_cfg_reply_error_string("cannot change link setting"); - - return tipc_cfg_reply_none(); -} - -/** * link_reset_statistics - reset link statistics * @l_ptr: pointer to link */ @@ -2098,207 +1968,13 @@ static void link_reset_statistics(struct tipc_link *l_ptr) l_ptr->stats.recv_info = l_ptr->next_in_no; } -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space) -{ - char *link_name; - struct tipc_link *l_ptr; - struct tipc_node *node; - unsigned int bearer_id; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - link_name = (char *)TLV_DATA(req_tlv_area); - if (!strcmp(link_name, tipc_bclink_name)) { - if (tipc_bclink_reset_stats()) - return tipc_cfg_reply_error_string("link not found"); - return tipc_cfg_reply_none(); - } - node = tipc_link_find_owner(link_name, &bearer_id); - if (!node) - return tipc_cfg_reply_error_string("link not found"); - - tipc_node_lock(node); - l_ptr = node->links[bearer_id]; - if (!l_ptr) { - tipc_node_unlock(node); - return tipc_cfg_reply_error_string("link not found"); - } - link_reset_statistics(l_ptr); - tipc_node_unlock(node); - return tipc_cfg_reply_none(); -} - -/** - * percent - convert count to a percentage of total (rounding up or down) - */ -static u32 percent(u32 count, u32 total) -{ - return (count * 100 + (total / 2)) / total; -} - -/** - * tipc_link_stats - print link statistics - * @name: link name - * @buf: print buffer area - * @buf_size: size of print buffer area - * - * Returns length of print buffer data string (or 0 if error) - */ -static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) -{ - struct tipc_link *l; - struct tipc_stats *s; - struct tipc_node *node; - char *status; - u32 profile_total = 0; - unsigned int bearer_id; - int ret; - - if (!strcmp(name, tipc_bclink_name)) - return tipc_bclink_stats(buf, buf_size); - - node = tipc_link_find_owner(name, &bearer_id); - if (!node) - return 0; - - tipc_node_lock(node); - - l = node->links[bearer_id]; - if (!l) { - tipc_node_unlock(node); - return 0; - } - - s = &l->stats; - - if (tipc_link_is_active(l)) - status = "ACTIVE"; - else if (tipc_link_is_up(l)) - status = "STANDBY"; - else - status = "DEFUNCT"; - - ret = tipc_snprintf(buf, buf_size, "Link <%s>\n" - " %s MTU:%u Priority:%u Tolerance:%u ms" - " Window:%u packets\n", - l->name, status, l->max_pkt, l->priority, - l->tolerance, l->queue_limit[0]); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX packets:%u fragments:%u/%u bundles:%u/%u\n", - l->next_in_no - s->recv_info, s->recv_fragments, - s->recv_fragmented, s->recv_bundles, - s->recv_bundled); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX packets:%u fragments:%u/%u bundles:%u/%u\n", - l->next_out_no - s->sent_info, s->sent_fragments, - s->sent_fragmented, s->sent_bundles, - s->sent_bundled); - - profile_total = s->msg_length_counts; - if (!profile_total) - profile_total = 1; - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX profile sample:%u packets average:%u octets\n" - " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% " - "-16384:%u%% -32768:%u%% -66000:%u%%\n", - s->msg_length_counts, - s->msg_lengths_total / profile_total, - percent(s->msg_length_profile[0], profile_total), - percent(s->msg_length_profile[1], profile_total), - percent(s->msg_length_profile[2], profile_total), - percent(s->msg_length_profile[3], profile_total), - percent(s->msg_length_profile[4], profile_total), - percent(s->msg_length_profile[5], profile_total), - percent(s->msg_length_profile[6], profile_total)); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX states:%u probes:%u naks:%u defs:%u" - " dups:%u\n", s->recv_states, s->recv_probes, - s->recv_nacks, s->deferred_recv, s->duplicates); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX states:%u probes:%u naks:%u acks:%u" - " dups:%u\n", s->sent_states, s->sent_probes, - s->sent_nacks, s->sent_acks, s->retransmitted); - - ret += tipc_snprintf(buf + ret, buf_size - ret, - " Congestion link:%u Send queue" - " max:%u avg:%u\n", s->link_congs, - s->max_queue_sz, s->queue_sz_counts ? - (s->accu_queue_sz / s->queue_sz_counts) : 0); - - tipc_node_unlock(node); - return ret; -} - -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - int str_len; - int pb_len; - char *pb; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - str_len = tipc_link_stats((char *)TLV_DATA(req_tlv_area), - pb, pb_len); - if (!str_len) { - kfree_skb(buf); - return tipc_cfg_reply_error_string("link not found"); - } - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -/** - * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination - * @dest: network address of destination node - * @selector: used to select from set of active links - * - * If no active link can be found, uses default maximum packet size. - */ -u32 tipc_link_get_max_pkt(u32 dest, u32 selector) -{ - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - u32 res = MAX_PKT_DEFAULT; - - if (dest == tipc_own_addr) - return MAX_MSG_SIZE; - - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) - res = l_ptr->max_pkt; - tipc_node_unlock(n_ptr); - } - return res; -} - static void link_print(struct tipc_link *l_ptr, const char *str) { + struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id); struct tipc_bearer *b_ptr; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]); + b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); if (b_ptr) pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name); rcu_read_unlock(); @@ -2362,6 +2038,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) struct tipc_link *link; struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2377,7 +2054,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2493,14 +2170,16 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) +static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; @@ -2512,7 +2191,7 @@ static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link) if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, - tipc_cluster_mask(tipc_own_addr))) + tipc_cluster_mask(tn->own_addr))) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt)) goto attr_msg_full; @@ -2562,9 +2241,8 @@ msg_full: } /* Caller should hold node lock */ -static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, - struct tipc_node *node, - u32 *prev_link) +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) { u32 i; int err; @@ -2575,7 +2253,7 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, if (!node->links[i]) continue; - err = __tipc_nl_add_link(msg, node->links[i]); + err = __tipc_nl_add_link(net, msg, node->links[i]); if (err) return err; } @@ -2586,6 +2264,8 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg, int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_nl_msg msg; u32 prev_node = cb->args[0]; @@ -2603,7 +2283,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); if (prev_node) { - node = tipc_node_find(prev_node); + node = tipc_node_find(net, prev_node); if (!node) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the @@ -2615,9 +2295,11 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out; } - list_for_each_entry_continue_rcu(node, &tipc_node_list, list) { + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { tipc_node_lock(node); - err = __tipc_nl_add_node_links(&msg, node, &prev_link); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); tipc_node_unlock(node); if (err) goto out; @@ -2625,13 +2307,14 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) prev_node = node->addr; } } else { - err = tipc_nl_add_bc_link(&msg); + err = tipc_nl_add_bc_link(net, &msg); if (err) goto out; - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_lock(node); - err = __tipc_nl_add_node_links(&msg, node, &prev_link); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); tipc_node_unlock(node); if (err) goto out; @@ -2652,6 +2335,7 @@ out: int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct sk_buff *ans_skb; struct tipc_nl_msg msg; struct tipc_link *link; @@ -2664,7 +2348,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) return -EINVAL; name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(name, &bearer_id); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2683,7 +2367,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) goto err_out; } - err = __tipc_nl_add_link(&msg, link); + err = __tipc_nl_add_link(net, &msg, link); if (err) goto err_out; @@ -2706,6 +2390,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_link *link; struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2722,13 +2407,13 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(); + err = tipc_bclink_reset_stats(net); if (err) return err; return 0; } - node = tipc_link_find_owner(link_name, &bearer_id); + node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return -EINVAL; diff --git a/net/tipc/link.h b/net/tipc/link.h index 55812e87ca1e..7aeb52092bf3 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -41,6 +41,10 @@ #include "msg.h" #include "node.h" +/* TIPC-specific error codes +*/ +#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ + /* Out-of-range value for link sequence numbers */ #define INVALID_LINK_SEQ 0x10000 @@ -99,13 +103,14 @@ struct tipc_stats { * @media_addr: media address to use when sending messages over link * @timer: link timer * @owner: pointer to peer node + * @refcnt: reference counter for permanent references (owner node & timer) * @flags: execution state flags for link endpoint instance * @checkpoint: reference point for triggering link continuity checking * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @continuity_interval: link continuity testing interval [in ms] + * @cont_intv: link continuity testing interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state @@ -126,8 +131,10 @@ struct tipc_stats { * @next_in_no: next sequence number to expect for inbound messages * @deferred_queue: deferred queue saved OOS b'cast message received from node * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards * @next_out: ptr to first unsent outbound message in queue - * @waiting_sks: linked list of sockets waiting for link congestion to abate + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity @@ -138,6 +145,7 @@ struct tipc_link { struct tipc_media_addr media_addr; struct timer_list timer; struct tipc_node *owner; + struct kref ref; /* Management and link supervision data */ unsigned int flags; @@ -146,7 +154,7 @@ struct tipc_link { u32 peer_bearer_id; u32 bearer_id; u32 tolerance; - u32 continuity_interval; + unsigned long cont_intv; u32 abort_limit; int state; u32 fsm_msg_cnt; @@ -178,10 +186,12 @@ struct tipc_link { u32 next_in_no; struct sk_buff_head deferred_queue; u32 unacked_window; + struct sk_buff_head inputq; + struct sk_buff_head namedq; /* Congestion handling */ struct sk_buff *next_out; - struct sk_buff_head waiting_sks; + struct sk_buff_head wakeupq; /* Fragmentation/reassembly */ u32 long_msg_seq_no; @@ -196,28 +206,24 @@ struct tipc_port; struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); -void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down); +void tipc_link_delete(struct tipc_link *link); +void tipc_link_delete_list(struct net *net, unsigned int bearer_id, + bool shutting_down); void tipc_link_failover_send_queue(struct tipc_link *l_ptr); void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); void tipc_link_purge_queues(struct tipc_link *l_ptr); -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, - int req_tlv_space, - u16 cmd); -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, - int req_tlv_space); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, - int req_tlv_space); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); -void tipc_link_reset_list(unsigned int bearer_id); -int tipc_link_xmit_skb(struct sk_buff *skb, u32 dest, u32 selector); -int tipc_link_xmit(struct sk_buff_head *list, u32 dest, u32 selector); -int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list); -u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -void tipc_link_bundle_rcv(struct sk_buff *buf); +void tipc_link_reset_list(struct net *net, unsigned int bearer_id); +int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, + u32 selector); +int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, + u32 selector); +int __tipc_link_xmit(struct net *net, struct tipc_link *link, + struct sk_buff_head *list); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); void tipc_link_push_packets(struct tipc_link *l_ptr); @@ -233,6 +239,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); +void link_prepare_wakeup(struct tipc_link *l); /* * Link sequence number manipulation routines (uses modulo 2**16 arithmetic) @@ -267,6 +274,10 @@ static inline u32 lesser(u32 left, u32 right) return less_eq(left, right) ? left : right; } +static inline u32 link_own_addr(struct tipc_link *l) +{ + return msg_prevnode(l->pmsg); +} /* * Link status checking routines diff --git a/net/tipc/log.c b/net/tipc/log.c deleted file mode 100644 index abef644f27d8..000000000000 --- a/net/tipc/log.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * net/tipc/log.c: TIPC print buffer routines for debugging - * - * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "config.h" - -/** - * tipc_snprintf - append formatted output to print buffer - * @buf: pointer to print buffer - * @len: buffer length - * @fmt: formatted info to be printed - */ -int tipc_snprintf(char *buf, int len, const char *fmt, ...) -{ - int i; - va_list args; - - va_start(args, fmt); - i = vscnprintf(buf, len, fmt, args); - va_end(args); - return i; -} diff --git a/net/tipc/msg.c b/net/tipc/msg.c index a687b30a699c..b6eb90cd3ef7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -34,6 +34,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" #include "msg.h" #include "addr.h" @@ -46,25 +47,48 @@ static unsigned int align(unsigned int i) return (i + 3) & ~3u; } -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode) +/** + * tipc_buf_acquire - creates a TIPC message buffer + * @size: message size (including TIPC header) + * + * Returns a new buffer with data pointers set to the specified size. + * + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. + */ +struct sk_buff *tipc_buf_acquire(u32 size) +{ + struct sk_buff *skb; + unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; + + skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + if (skb) { + skb_reserve(skb, BUF_HEADROOM); + skb_put(skb, size); + skb->next = NULL; + } + return skb; +} + +void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 dnode) { memset(m, 0, hsize); msg_set_version(m); msg_set_user(m, user); msg_set_hdr_sz(m, hsize); msg_set_size(m, hsize); - msg_set_prevnode(m, tipc_own_addr); + msg_set_prevnode(m, own_node); msg_set_type(m, type); if (hsize > SHORT_H_SIZE) { - msg_set_orignode(m, tipc_own_addr); - msg_set_destnode(m, destnode); + msg_set_orignode(m, own_node); + msg_set_destnode(m, dnode); } } -struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, - uint data_sz, u32 dnode, u32 onode, - u32 dport, u32 oport, int errcode) +struct sk_buff *tipc_msg_create(uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode) { struct tipc_msg *msg; struct sk_buff *buf; @@ -74,9 +98,8 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, return NULL; msg = buf_msg(buf); - tipc_msg_init(msg, user, type, hdr_sz, dnode); + tipc_msg_init(onode, msg, user, type, hdr_sz, dnode); msg_set_size(msg, hdr_sz + data_sz); - msg_set_prevnode(msg, onode); msg_set_origport(msg, oport); msg_set_destport(msg, dport); msg_set_errcode(msg, errcode); @@ -163,15 +186,14 @@ err: * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @m: User message - * @offset: Posision in iov to start copying from * @dsz: Total length of user data * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller * * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, - int dsz, int pktmax, struct sk_buff_head *list) +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int pktmax, struct sk_buff_head *list) { int mhsz = msg_hdr_sz(mhdr); int msz = mhsz + dsz; @@ -191,19 +213,19 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, skb = tipc_buf_acquire(msz); if (unlikely(!skb)) return -ENOMEM; + skb_orphan(skb); __skb_queue_tail(list, skb); skb_copy_to_linear_data(skb, mhdr, mhsz); pktpos = skb->data + mhsz; - if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, - dsz)) + if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz) return dsz; rc = -EFAULT; goto error; } /* Prepare reusable fragment header */ - tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(mhdr)); + tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER, + FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); msg_set_size(&pkthdr, pktmax); msg_set_fragm_no(&pkthdr, pktno); @@ -211,6 +233,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, skb = tipc_buf_acquire(pktmax); if (!skb) return -ENOMEM; + skb_orphan(skb); __skb_queue_tail(list, skb); pktpos = skb->data; skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); @@ -224,12 +247,11 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, if (drem < pktrem) pktrem = drem; - if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) { + if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) { rc = -EFAULT; goto error; } drem -= pktrem; - offset += pktrem; if (!drem) break; @@ -244,6 +266,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, rc = -ENOMEM; goto error; } + skb_orphan(skb); __skb_queue_tail(list, skb); msg_set_type(&pkthdr, FRAGMENT); msg_set_size(&pkthdr, pktsz); @@ -304,6 +327,40 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) } /** + * tipc_msg_extract(): extract bundled inner packet from buffer + * @skb: linear outer buffer, to be extracted from. + * @iskb: extracted inner buffer, to be returned + * @pos: position of msg to be extracted. Returns with pointer of next msg + * Consumes outer buffer when last packet extracted + * Returns true when when there is an extracted buffer, otherwise false + */ +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) +{ + struct tipc_msg *msg = buf_msg(skb); + int imsz; + struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos); + + /* Is there space left for shortest possible message? */ + if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE)) + goto none; + imsz = msg_size(imsg); + + /* Is there space left for current message ? */ + if ((*pos + imsz) > msg_data_sz(msg)) + goto none; + *iskb = tipc_buf_acquire(imsz); + if (!*iskb) + goto none; + skb_copy_to_linear_data(*iskb, imsg, imsz); + *pos += align(imsz); + return true; +none: + kfree_skb(skb); + *iskb = NULL; + return false; +} + +/** * tipc_msg_make_bundle(): Create bundle buf and append message to its tail * @list: the buffer chain * @skb: buffer to be appended and replaced @@ -312,8 +369,8 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) * Replaces buffer if successful * Returns true if success, otherwise false */ -bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, - u32 mtu, u32 dnode) +bool tipc_msg_make_bundle(struct sk_buff_head *list, + struct sk_buff *skb, u32 mtu, u32 dnode) { struct sk_buff *bskb; struct tipc_msg *bmsg; @@ -336,7 +393,8 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, skb_trim(bskb, INT_H_SIZE); bmsg = buf_msg(bskb); - tipc_msg_init(bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode); + tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, + INT_H_SIZE, dnode); msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); @@ -353,7 +411,8 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, * Consumes buffer if failure * Returns true if success, otherwise false */ -bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, + int err) { struct tipc_msg *msg = buf_msg(buf); uint imp = msg_importance(msg); @@ -374,7 +433,7 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); - msg_set_prevnode(msg, tipc_own_addr); + msg_set_prevnode(msg, own_addr); if (!msg_short(msg)) { msg_set_orignode(msg, msg_destnode(&ohdr)); msg_set_destnode(msg, msg_orignode(&ohdr)); @@ -386,43 +445,43 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) return true; exit: kfree_skb(buf); + *dnode = 0; return false; } /** - * tipc_msg_eval: determine fate of message that found no destination - * @buf: the buffer containing the message. - * @dnode: return value: next-hop node, if message to be forwarded - * @err: error code to use, if message to be rejected - * + * tipc_msg_lookup_dest(): try to find new destination for named message + * @skb: the buffer containing the message. + * @dnode: return value: next-hop node, if destination found + * @err: return value: error code to use, if message to be rejected * Does not consume buffer - * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error - * code if message to be rejected + * Returns true if a destination is found, false otherwise */ -int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, + u32 *dnode, int *err) { - struct tipc_msg *msg = buf_msg(buf); + struct tipc_msg *msg = buf_msg(skb); u32 dport; - if (msg_type(msg) != TIPC_NAMED_MSG) - return -TIPC_ERR_NO_PORT; - if (skb_linearize(buf)) - return -TIPC_ERR_NO_NAME; - if (msg_data_sz(msg) > MAX_FORWARD_SIZE) - return -TIPC_ERR_NO_NAME; + if (!msg_isdata(msg)) + return false; + if (!msg_named(msg)) + return false; + *err = -TIPC_ERR_NO_NAME; + if (skb_linearize(skb)) + return false; if (msg_reroute_cnt(msg) > 0) - return -TIPC_ERR_NO_NAME; - - *dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), - msg_nameinst(msg), - dnode); + return false; + *dnode = addr_domain(net, msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(net, msg_nametype(msg), + msg_nameinst(msg), dnode); if (!dport) - return -TIPC_ERR_NO_NAME; + return false; msg_incr_reroute_cnt(msg); msg_set_destnode(msg, *dnode); msg_set_destport(msg, dport); - return TIPC_OK; + *err = TIPC_OK; + return true; } /* tipc_msg_reassemble() - clone a buffer chain of fragments and diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d5c83d7ecb47..9ace47f44a69 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -37,7 +37,7 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "bearer.h" +#include <linux/tipc.h> /* * Constants and routines used to read and write TIPC payload message headers @@ -45,6 +45,7 @@ * Note: Some items are also used with TIPC internal message headers */ #define TIPC_VERSION 2 +struct plist; /* * Payload message users are defined in TIPC's public API: @@ -77,11 +78,37 @@ #define TIPC_MEDIA_ADDR_OFFSET 5 +/** + * TIPC message buffer code + * + * TIPC message buffer headroom reserves space for the worst-case + * link-level device header (in case the message is sent off-node). + * + * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields + * are word aligned for quicker access + */ +#define BUF_HEADROOM LL_MAX_HEADER + +struct tipc_skb_cb { + void *handle; + struct sk_buff *tail; + bool deferred; + bool wakeup_pending; + bool bundling; + u16 chain_sz; + u16 chain_imp; +}; + +#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) struct tipc_msg { __be32 hdr[15]; }; +static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +{ + return (struct tipc_msg *)skb->data; +} static inline u32 msg_word(struct tipc_msg *m, u32 pos) { @@ -721,27 +748,111 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) return msg_origport(m); } -bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); - -int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); - -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, - u32 destnode); - +struct sk_buff *tipc_buf_acquire(u32 size); +bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, + int err); +void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); - int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); - bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); +bool tipc_msg_make_bundle(struct sk_buff_head *list, + struct sk_buff *skb, u32 mtu, u32 dnode); +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int mtu, struct sk_buff_head *list); +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode, + int *err); +struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); -bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb, - u32 mtu, u32 dnode); - -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, - int dsz, int mtu, struct sk_buff_head *list); +/* tipc_skb_peek(): peek and reserve first buffer in list + * @list: list to be peeked in + * Returns pointer to first buffer in list, if any + */ +static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list, + spinlock_t *lock) +{ + struct sk_buff *skb; + + spin_lock_bh(lock); + skb = skb_peek(list); + if (skb) + skb_get(skb); + spin_unlock_bh(lock); + return skb; +} + +/* tipc_skb_peek_port(): find a destination port, ignoring all destinations + * up to and including 'filter'. + * Note: ignoring previously tried destinations minimizes the risk of + * contention on the socket lock + * @list: list to be peeked in + * @filter: last destination to be ignored from search + * Returns a destination port number, of applicable. + */ +static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter) +{ + struct sk_buff *skb; + u32 dport = 0; + bool ignore = true; + + spin_lock_bh(&list->lock); + skb_queue_walk(list, skb) { + dport = msg_destport(buf_msg(skb)); + if (!filter || skb_queue_is_last(list, skb)) + break; + if (dport == filter) + ignore = false; + else if (!ignore) + break; + } + spin_unlock_bh(&list->lock); + return dport; +} + +/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list + * @list: list to be unlinked from + * @dport: selection criteria for buffer to unlink + */ +static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, + u32 dport) +{ + struct sk_buff *_skb, *tmp, *skb = NULL; + + spin_lock_bh(&list->lock); + skb_queue_walk_safe(list, _skb, tmp) { + if (msg_destport(buf_msg(_skb)) == dport) { + __skb_unlink(_skb, list); + skb = _skb; + break; + } + } + spin_unlock_bh(&list->lock); + return skb; +} + +/* tipc_skb_queue_tail(): add buffer to tail of list; + * @list: list to be appended to + * @skb: buffer to append. Always appended + * @dport: the destination port of the buffer + * returns true if dport differs from previous destination + */ +static inline bool tipc_skb_queue_tail(struct sk_buff_head *list, + struct sk_buff *skb, u32 dport) +{ + struct sk_buff *_skb = NULL; + bool rv = false; -struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); + spin_lock_bh(&list->lock); + _skb = skb_peek_tail(list); + if (!_skb || (msg_destport(buf_msg(_skb)) != dport) || + (skb_queue_len(list) > 32)) + rv = true; + __skb_queue_tail(list, skb); + spin_unlock_bh(&list->lock); + return rv; +} #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index ba6083dca95b..fcb07915aaac 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -68,29 +68,33 @@ static void publ_to_item(struct distr_item *i, struct publication *p) /** * named_prepare_buf - allocate & initialize a publication message */ -static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) +static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, + u32 dest) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); struct tipc_msg *msg; if (buf != NULL) { msg = buf_msg(buf); - tipc_msg_init(msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest); + tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type, + INT_H_SIZE, dest); msg_set_size(msg, INT_H_SIZE + size); } return buf; } -void named_cluster_distribute(struct sk_buff *skb) +void named_cluster_distribute(struct net *net, struct sk_buff *skb) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *oskb; struct tipc_node *node; u32 dnode; rcu_read_lock(); - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { dnode = node->addr; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) continue; if (!tipc_node_active_links(node)) continue; @@ -98,7 +102,7 @@ void named_cluster_distribute(struct sk_buff *skb) if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); - tipc_link_xmit_skb(oskb, dnode, dnode); + tipc_link_xmit_skb(net, oskb, dnode, dnode); } rcu_read_unlock(); @@ -108,18 +112,19 @@ void named_cluster_distribute(struct sk_buff *skb) /** * tipc_named_publish - tell other nodes about a new publication by this node */ -struct sk_buff *tipc_named_publish(struct publication *publ) +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *buf; struct distr_item *item; list_add_tail_rcu(&publ->local_list, - &tipc_nametbl->publ_list[publ->scope]); + &tn->nametbl->publ_list[publ->scope]); if (publ->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); + buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); if (!buf) { pr_warn("Publication distribution failure\n"); return NULL; @@ -133,7 +138,7 @@ struct sk_buff *tipc_named_publish(struct publication *publ) /** * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node */ -struct sk_buff *tipc_named_withdraw(struct publication *publ) +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) { struct sk_buff *buf; struct distr_item *item; @@ -143,7 +148,7 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) if (publ->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); + buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { pr_warn("Withdrawal distribution failure\n"); return NULL; @@ -160,19 +165,21 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) * @dnode: node to be updated * @pls: linked list of publication items to be packed into buffer chain */ -static void named_distribute(struct sk_buff_head *list, u32 dnode, - struct list_head *pls) +static void named_distribute(struct net *net, struct sk_buff_head *list, + u32 dnode, struct list_head *pls) { struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE; + uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) * + ITEM_SIZE; uint msg_rem = msg_dsz; list_for_each_entry(publ, pls, local_list) { /* Prepare next buffer: */ if (!skb) { - skb = named_prepare_buf(PUBLICATION, msg_rem, dnode); + skb = named_prepare_buf(net, PUBLICATION, msg_rem, + dnode); if (!skb) { pr_warn("Bulk publication failure\n"); return; @@ -202,30 +209,32 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode, /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(u32 dnode) +void tipc_named_node_up(struct net *net, u32 dnode) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff_head head; __skb_queue_head_init(&head); rcu_read_lock(); - named_distribute(&head, dnode, - &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); - named_distribute(&head, dnode, - &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + named_distribute(net, &head, dnode, + &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]); rcu_read_unlock(); - tipc_link_xmit(&head, dnode, dnode); + tipc_link_xmit(net, &head, dnode, dnode); } -static void tipc_publ_subscribe(struct publication *publ, u32 addr) +static void tipc_publ_subscribe(struct net *net, struct publication *publ, + u32 addr) { struct tipc_node *node; - if (in_own_node(addr)) + if (in_own_node(net, addr)) return; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (!node) { pr_warn("Node subscription rejected, unknown node 0x%x\n", addr); @@ -237,11 +246,12 @@ static void tipc_publ_subscribe(struct publication *publ, u32 addr) tipc_node_unlock(node); } -static void tipc_publ_unsubscribe(struct publication *publ, u32 addr) +static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, + u32 addr) { struct tipc_node *node; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (!node) return; @@ -256,16 +266,17 @@ static void tipc_publ_unsubscribe(struct publication *publ, u32 addr) * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. */ -static void tipc_publ_purge(struct publication *publ, u32 addr) +static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *p; - spin_lock_bh(&tipc_nametbl_lock); - p = tipc_nametbl_remove_publ(publ->type, publ->lower, + spin_lock_bh(&tn->nametbl_lock); + p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) - tipc_publ_unsubscribe(p, addr); - spin_unlock_bh(&tipc_nametbl_lock); + tipc_publ_unsubscribe(net, p, addr); + spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" @@ -277,12 +288,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr) kfree_rcu(p, rcu); } -void tipc_publ_notify(struct list_head *nsub_list, u32 addr) +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) - tipc_publ_purge(publ, addr); + tipc_publ_purge(net, publ, addr); } /** @@ -292,25 +303,28 @@ void tipc_publ_notify(struct list_head *nsub_list, u32 addr) * tipc_nametbl_lock must be held. * Returns the publication item if successful, otherwise NULL. */ -static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype) +static bool tipc_update_nametbl(struct net *net, struct distr_item *i, + u32 node, u32 dtype) { struct publication *publ = NULL; if (dtype == PUBLICATION) { - publ = tipc_nametbl_insert_publ(ntohl(i->type), ntohl(i->lower), + publ = tipc_nametbl_insert_publ(net, ntohl(i->type), + ntohl(i->lower), ntohl(i->upper), TIPC_CLUSTER_SCOPE, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_subscribe(publ, node); + tipc_publ_subscribe(net, publ, node); return true; } } else if (dtype == WITHDRAWAL) { - publ = tipc_nametbl_remove_publ(ntohl(i->type), ntohl(i->lower), + publ = tipc_nametbl_remove_publ(net, ntohl(i->type), + ntohl(i->lower), node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_unsubscribe(publ, node); + tipc_publ_unsubscribe(net, publ, node); kfree_rcu(publ, rcu); return true; } @@ -343,7 +357,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) * tipc_named_process_backlog - try to process any pending name table updates * from the network. */ -void tipc_named_process_backlog(void) +void tipc_named_process_backlog(struct net *net) { struct distr_queue_item *e, *tmp; char addr[16]; @@ -351,7 +365,7 @@ void tipc_named_process_backlog(void) list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { if (time_after(e->expires, now)) { - if (!tipc_update_nametbl(&e->i, e->node, e->dtype)) + if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) continue; } else { tipc_addr_string_fill(addr, e->node); @@ -367,24 +381,34 @@ void tipc_named_process_backlog(void) } /** - * tipc_named_rcv - process name table update message sent by another node + * tipc_named_rcv - process name table update messages sent by another node */ -void tipc_named_rcv(struct sk_buff *buf) +void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) { - struct tipc_msg *msg = buf_msg(buf); - struct distr_item *item = (struct distr_item *)msg_data(msg); - u32 count = msg_data_sz(msg) / ITEM_SIZE; - u32 node = msg_orignode(msg); - - spin_lock_bh(&tipc_nametbl_lock); - while (count--) { - if (!tipc_update_nametbl(item, node, msg_type(msg))) - tipc_named_add_backlog(item, msg_type(msg), node); - item++; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_msg *msg; + struct distr_item *item; + uint count; + u32 node; + struct sk_buff *skb; + int mtype; + + spin_lock_bh(&tn->nametbl_lock); + for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + msg = buf_msg(skb); + mtype = msg_type(msg); + item = (struct distr_item *)msg_data(msg); + count = msg_data_sz(msg) / ITEM_SIZE; + node = msg_orignode(msg); + while (count--) { + if (!tipc_update_nametbl(net, item, node, mtype)) + tipc_named_add_backlog(item, mtype, node); + item++; + } + kfree_skb(skb); + tipc_named_process_backlog(net); } - tipc_named_process_backlog(); - spin_unlock_bh(&tipc_nametbl_lock); - kfree_skb(buf); + spin_unlock_bh(&tn->nametbl_lock); } /** @@ -394,17 +418,18 @@ void tipc_named_rcv(struct sk_buff *buf) * All name table entries published by this node are updated to reflect * the node's new network address. */ -void tipc_named_reinit(void) +void tipc_named_reinit(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; int scope; - spin_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tn->nametbl_lock); for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope], + list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope], local_list) - publ->node = tipc_own_addr; + publ->node = tn->own_addr; - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index cef55cedcfb2..dd2d9fd80da2 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -67,13 +67,13 @@ struct distr_item { __be32 key; }; -struct sk_buff *tipc_named_publish(struct publication *publ); -struct sk_buff *tipc_named_withdraw(struct publication *publ); -void named_cluster_distribute(struct sk_buff *buf); -void tipc_named_node_up(u32 dnode); -void tipc_named_rcv(struct sk_buff *buf); -void tipc_named_reinit(void); -void tipc_named_process_backlog(void); -void tipc_publ_notify(struct list_head *nsub_list, u32 addr); +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); +void named_cluster_distribute(struct net *net, struct sk_buff *buf); +void tipc_named_node_up(struct net *net, u32 dnode); +void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); +void tipc_named_reinit(struct net *net); +void tipc_named_process_backlog(struct net *net); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index c8df0223371a..105ba7adf06f 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -1,7 +1,7 @@ /* * net/tipc/name_table.c: TIPC name table code * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2004-2008, 2010-2014, Wind River Systems * All rights reserved. * @@ -34,11 +34,15 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <net/sock.h> #include "core.h" -#include "config.h" +#include "netlink.h" #include "name_table.h" #include "name_distr.h" #include "subscr.h" +#include "bcast.h" +#include "addr.h" +#include <net/genetlink.h> #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -105,9 +109,6 @@ struct name_seq { struct rcu_head rcu; }; -struct name_table *tipc_nametbl; -DEFINE_SPINLOCK(tipc_nametbl_lock); - static int hash(int x) { return x & (TIPC_NAMETBL_SIZE - 1); @@ -228,9 +229,11 @@ static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance) /** * tipc_nameseq_insert_publ */ -static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, - u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) +static struct publication *tipc_nameseq_insert_publ(struct net *net, + struct name_seq *nseq, + u32 type, u32 lower, + u32 upper, u32 scope, + u32 node, u32 port, u32 key) { struct tipc_subscription *s; struct tipc_subscription *st; @@ -315,12 +318,12 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, list_add(&publ->zone_list, &info->zone_list); info->zone_list_size++; - if (in_own_cluster(node)) { + if (in_own_cluster(net, node)) { list_add(&publ->cluster_list, &info->cluster_list); info->cluster_list_size++; } - if (in_own_node(node)) { + if (in_own_node(net, node)) { list_add(&publ->node_list, &info->node_list); info->node_list_size++; } @@ -349,8 +352,10 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, * A failed withdraw request simply returns a failure indication and lets the * caller issue any error or warning messages associated with such a problem. */ -static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst, - u32 node, u32 ref, u32 key) +static struct publication *tipc_nameseq_remove_publ(struct net *net, + struct name_seq *nseq, + u32 inst, u32 node, + u32 ref, u32 key) { struct publication *publ; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); @@ -378,13 +383,13 @@ found: info->zone_list_size--; /* Remove publication from cluster scope list, if present */ - if (in_own_cluster(node)) { + if (in_own_cluster(net, node)) { list_del(&publ->cluster_list); info->cluster_list_size--; } /* Remove publication from node scope list, if present */ - if (in_own_node(node)) { + if (in_own_node(net, node)) { list_del(&publ->node_list); info->node_list_size--; } @@ -447,12 +452,13 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, } } -static struct name_seq *nametbl_find_seq(u32 type) +static struct name_seq *nametbl_find_seq(struct net *net, u32 type) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct hlist_head *seq_head; struct name_seq *ns; - seq_head = &tipc_nametbl->seq_hlist[hash(type)]; + seq_head = &tn->nametbl->seq_hlist[hash(type)]; hlist_for_each_entry_rcu(ns, seq_head, ns_list) { if (ns->type == type) return ns; @@ -461,11 +467,13 @@ static struct name_seq *nametbl_find_seq(u32 type) return NULL; }; -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 port, u32 key) +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 port, u32 key) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct publication *publ; - struct name_seq *seq = nametbl_find_seq(type); + struct name_seq *seq = nametbl_find_seq(net, type); int index = hash(type); if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || @@ -476,29 +484,29 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, } if (!seq) - seq = tipc_nameseq_create(type, - &tipc_nametbl->seq_hlist[index]); + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (!seq) return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_insert_publ(seq, type, lower, upper, + publ = tipc_nameseq_insert_publ(net, seq, type, lower, upper, scope, node, port, key); spin_unlock_bh(&seq->lock); return publ; } -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key) +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, + u32 key) { struct publication *publ; - struct name_seq *seq = nametbl_find_seq(type); + struct name_seq *seq = nametbl_find_seq(net, type); if (!seq) return NULL; spin_lock_bh(&seq->lock); - publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); + publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); @@ -523,8 +531,10 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, * - if name translation is attempted and fails, sets 'destnode' to 0 * and returns 0 */ -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, + u32 *destnode) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sub_seq *sseq; struct name_info *info; struct publication *publ; @@ -532,11 +542,11 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) u32 ref = 0; u32 node = 0; - if (!tipc_in_scope(*destnode, tipc_own_addr)) + if (!tipc_in_scope(*destnode, tn->own_addr)) return 0; rcu_read_lock(); - seq = nametbl_find_seq(type); + seq = nametbl_find_seq(net, type); if (unlikely(!seq)) goto not_found; spin_lock_bh(&seq->lock); @@ -569,13 +579,13 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) } /* Round-Robin Algorithm */ - else if (*destnode == tipc_own_addr) { + else if (*destnode == tn->own_addr) { if (list_empty(&info->node_list)) goto no_match; publ = list_first_entry(&info->node_list, struct publication, node_list); list_move_tail(&publ->node_list, &info->node_list); - } else if (in_own_cluster_exact(*destnode)) { + } else if (in_own_cluster_exact(net, *destnode)) { if (list_empty(&info->cluster_list)) goto no_match; publ = list_first_entry(&info->cluster_list, struct publication, @@ -609,8 +619,8 @@ not_found: * * Returns non-zero if any off-node ports overlap */ -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports) +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_plist *dports) { struct name_seq *seq; struct sub_seq *sseq; @@ -619,7 +629,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, int res = 0; rcu_read_lock(); - seq = nametbl_find_seq(type); + seq = nametbl_find_seq(net, type); if (!seq) goto exit; @@ -635,7 +645,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, info = sseq->info; list_for_each_entry(publ, &info->node_list, node_list) { if (publ->scope <= limit) - tipc_port_list_add(dports, publ->ref); + tipc_plist_push(dports, publ->ref); } if (info->cluster_list_size != info->node_list_size) @@ -650,50 +660,55 @@ exit: /* * tipc_nametbl_publish - add name publication to network name tables */ -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key) { struct publication *publ; struct sk_buff *buf = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&tipc_nametbl_lock); - if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { + spin_lock_bh(&tn->nametbl_lock); + if (tn->nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", TIPC_MAX_PUBLICATIONS); - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); return NULL; } - publ = tipc_nametbl_insert_publ(type, lower, upper, scope, - tipc_own_addr, port_ref, key); + publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope, + tn->own_addr, port_ref, key); if (likely(publ)) { - tipc_nametbl->local_publ_count++; - buf = tipc_named_publish(publ); + tn->nametbl->local_publ_count++; + buf = tipc_named_publish(net, publ); /* Any pending external events? */ - tipc_named_process_backlog(); + tipc_named_process_backlog(net); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); if (buf) - named_cluster_distribute(buf); + named_cluster_distribute(net, buf); return publ; } /** * tipc_nametbl_withdraw - withdraw name publication from network name tables */ -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key) { struct publication *publ; struct sk_buff *skb = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&tipc_nametbl_lock); - publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); + spin_lock_bh(&tn->nametbl_lock); + publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr, + ref, key); if (likely(publ)) { - tipc_nametbl->local_publ_count--; - skb = tipc_named_withdraw(publ); + tn->nametbl->local_publ_count--; + skb = tipc_named_withdraw(net, publ); /* Any pending external events? */ - tipc_named_process_backlog(); + tipc_named_process_backlog(net); list_del_init(&publ->pport_list); kfree_rcu(publ, rcu); } else { @@ -701,10 +716,10 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) "(type=%u, lower=%u, ref=%u, key=%u)\n", type, lower, ref, key); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); if (skb) { - named_cluster_distribute(skb); + named_cluster_distribute(net, skb); return 1; } return 0; @@ -715,15 +730,15 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) */ void tipc_nametbl_subscribe(struct tipc_subscription *s) { + struct tipc_net *tn = net_generic(s->net, tipc_net_id); u32 type = s->seq.type; int index = hash(type); struct name_seq *seq; - spin_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(type); + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, type); if (!seq) - seq = tipc_nameseq_create(type, - &tipc_nametbl->seq_hlist[index]); + seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); tipc_nameseq_subscribe(seq, s); @@ -732,7 +747,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) pr_warn("Failed to create subscription for {%u,%u,%u}\n", s->seq.type, s->seq.lower, s->seq.upper); } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); } /** @@ -740,10 +755,11 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) */ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { + struct tipc_net *tn = net_generic(s->net, tipc_net_id); struct name_seq *seq; - spin_lock_bh(&tipc_nametbl_lock); - seq = nametbl_find_seq(s->seq.type); + spin_lock_bh(&tn->nametbl_lock); + seq = nametbl_find_seq(s->net, s->seq.type); if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); @@ -756,193 +772,13 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) spin_unlock_bh(&seq->lock); } } - spin_unlock_bh(&tipc_nametbl_lock); -} - -/** - * subseq_list - print specified sub-sequence contents into the given buffer - */ -static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, - u32 index) -{ - char portIdStr[27]; - const char *scope_str[] = {"", " zone", " cluster", " node"}; - struct publication *publ; - struct name_info *info; - int ret; - - ret = tipc_snprintf(buf, len, "%-10u %-10u ", sseq->lower, sseq->upper); - - if (depth == 2) { - ret += tipc_snprintf(buf - ret, len + ret, "\n"); - return ret; - } - - info = sseq->info; - - list_for_each_entry(publ, &info->zone_list, zone_list) { - sprintf(portIdStr, "<%u.%u.%u:%u>", - tipc_zone(publ->node), tipc_cluster(publ->node), - tipc_node(publ->node), publ->ref); - ret += tipc_snprintf(buf + ret, len - ret, "%-26s ", portIdStr); - if (depth > 3) { - ret += tipc_snprintf(buf + ret, len - ret, "%-10u %s", - publ->key, scope_str[publ->scope]); - } - if (!list_is_last(&publ->zone_list, &info->zone_list)) - ret += tipc_snprintf(buf + ret, len - ret, - "\n%33s", " "); - } - - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -/** - * nameseq_list - print specified name sequence contents into the given buffer - */ -static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, - u32 type, u32 lowbound, u32 upbound, u32 index) -{ - struct sub_seq *sseq; - char typearea[11]; - int ret = 0; - - if (seq->first_free == 0) - return 0; - - sprintf(typearea, "%-10u", seq->type); - - if (depth == 1) { - ret += tipc_snprintf(buf, len, "%s\n", typearea); - return ret; - } - - for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) { - if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) { - ret += tipc_snprintf(buf + ret, len - ret, "%s ", - typearea); - spin_lock_bh(&seq->lock); - ret += subseq_list(sseq, buf + ret, len - ret, - depth, index); - spin_unlock_bh(&seq->lock); - sprintf(typearea, "%10s", " "); - } - } - return ret; -} - -/** - * nametbl_header - print name table header into the given buffer - */ -static int nametbl_header(char *buf, int len, u32 depth) -{ - const char *header[] = { - "Type ", - "Lower Upper ", - "Port Identity ", - "Publication Scope" - }; - - int i; - int ret = 0; - - if (depth > 4) - depth = 4; - for (i = 0; i < depth; i++) - ret += tipc_snprintf(buf + ret, len - ret, header[i]); - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -/** - * nametbl_list - print specified name table contents into the given buffer - */ -static int nametbl_list(char *buf, int len, u32 depth_info, - u32 type, u32 lowbound, u32 upbound) -{ - struct hlist_head *seq_head; - struct name_seq *seq; - int all_types; - int ret = 0; - u32 depth; - u32 i; - - all_types = (depth_info & TIPC_NTQ_ALLTYPES); - depth = (depth_info & ~TIPC_NTQ_ALLTYPES); - - if (depth == 0) - return 0; - - if (all_types) { - /* display all entries in name table to specified depth */ - ret += nametbl_header(buf, len, depth); - lowbound = 0; - upbound = ~0; - for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &tipc_nametbl->seq_hlist[i]; - hlist_for_each_entry_rcu(seq, seq_head, ns_list) { - ret += nameseq_list(seq, buf + ret, len - ret, - depth, seq->type, - lowbound, upbound, i); - } - } - } else { - /* display only the sequence that matches the specified type */ - if (upbound < lowbound) { - ret += tipc_snprintf(buf + ret, len - ret, - "invalid name sequence specified\n"); - return ret; - } - ret += nametbl_header(buf + ret, len - ret, depth); - i = hash(type); - seq_head = &tipc_nametbl->seq_hlist[i]; - hlist_for_each_entry_rcu(seq, seq_head, ns_list) { - if (seq->type == type) { - ret += nameseq_list(seq, buf + ret, len - ret, - depth, type, - lowbound, upbound, i); - break; - } - } - } - return ret; + spin_unlock_bh(&tn->nametbl_lock); } -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) -{ - struct sk_buff *buf; - struct tipc_name_table_query *argv; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - int str_len; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NAME_TBL_QUERY)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area); - rcu_read_lock(); - str_len = nametbl_list(pb, pb_len, ntohl(argv->depth), - ntohl(argv->type), - ntohl(argv->lowbound), ntohl(argv->upbound)); - rcu_read_unlock(); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -int tipc_nametbl_init(void) +int tipc_nametbl_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl; int i; tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC); @@ -955,6 +791,8 @@ int tipc_nametbl_init(void) INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); + tn->nametbl = tipc_nametbl; + spin_lock_init(&tn->nametbl_lock); return 0; } @@ -963,7 +801,7 @@ int tipc_nametbl_init(void) * * tipc_nametbl_lock must be held when calling this function */ -static void tipc_purge_publications(struct name_seq *seq) +static void tipc_purge_publications(struct net *net, struct name_seq *seq) { struct publication *publ, *safe; struct sub_seq *sseq; @@ -973,8 +811,8 @@ static void tipc_purge_publications(struct name_seq *seq) sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { - tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, - publ->ref, publ->key); + tipc_nametbl_remove_publ(net, publ->type, publ->lower, + publ->node, publ->ref, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); @@ -984,25 +822,27 @@ static void tipc_purge_publications(struct name_seq *seq) kfree_rcu(seq, rcu); } -void tipc_nametbl_stop(void) +void tipc_nametbl_stop(struct net *net) { u32 i; struct name_seq *seq; struct hlist_head *seq_head; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct name_table *tipc_nametbl = tn->nametbl; /* Verify name table is empty and purge any lingering * publications, then release the name table */ - spin_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tn->nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&tipc_nametbl->seq_hlist[i])) continue; seq_head = &tipc_nametbl->seq_hlist[i]; hlist_for_each_entry_rcu(seq, seq_head, ns_list) { - tipc_purge_publications(seq); + tipc_purge_publications(net, seq); } } - spin_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tn->nametbl_lock); synchronize_net(); kfree(tipc_nametbl); @@ -1033,7 +873,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, *last_publ = p->key; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, - &tipc_genl_v2_family, NLM_F_MULTI, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NAME_TABLE_GET); if (!hdr) return -EMSGSIZE; @@ -1106,9 +946,10 @@ static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq, return 0; } -static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, - u32 *last_lower, u32 *last_publ) +static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg, + u32 *last_type, u32 *last_lower, u32 *last_publ) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct hlist_head *seq_head; struct name_seq *seq = NULL; int err; @@ -1120,10 +961,10 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, i = 0; for (; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &tipc_nametbl->seq_hlist[i]; + seq_head = &tn->nametbl->seq_hlist[i]; if (*last_type) { - seq = nametbl_find_seq(*last_type); + seq = nametbl_find_seq(net, *last_type); if (!seq) return -EPIPE; } else { @@ -1157,6 +998,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 last_type = cb->args[0]; u32 last_lower = cb->args[1]; u32 last_publ = cb->args[2]; + struct net *net = sock_net(skb->sk); struct tipc_nl_msg msg; if (done) @@ -1167,7 +1009,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ); + err = tipc_nl_seq_list(net, &msg, &last_type, &last_lower, &last_publ); if (!err) { done = 1; } else if (err != -EMSGSIZE) { @@ -1188,3 +1030,41 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } + +void tipc_plist_push(struct tipc_plist *pl, u32 port) +{ + struct tipc_plist *nl; + + if (likely(!pl->port)) { + pl->port = port; + return; + } + if (pl->port == port) + return; + list_for_each_entry(nl, &pl->list, list) { + if (nl->port == port) + return; + } + nl = kmalloc(sizeof(*nl), GFP_ATOMIC); + if (nl) { + nl->port = port; + list_add(&nl->list, &pl->list); + } +} + +u32 tipc_plist_pop(struct tipc_plist *pl) +{ + struct tipc_plist *nl; + u32 port = 0; + + if (likely(list_empty(&pl->list))) { + port = pl->port; + pl->port = 0; + return port; + } + nl = list_first_entry(&pl->list, typeof(*nl), list); + port = nl->port; + list_del(&nl->list); + kfree(nl); + return port; +} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 5f0dee92010d..1524a73830f7 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -1,7 +1,7 @@ /* * net/tipc/name_table.h: Include file for TIPC name table code * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -38,7 +38,7 @@ #define _TIPC_NAME_TABLE_H struct tipc_subscription; -struct tipc_port_list; +struct tipc_plist; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -95,26 +95,39 @@ struct name_table { u32 local_publ_count; }; -extern spinlock_t tipc_nametbl_lock; -extern struct name_table *tipc_nametbl; - int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); -struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); -u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); -struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); -int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); -struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); +int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, + u32 limit, struct tipc_plist *dports); +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port_ref, + u32 key); +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, + u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 ref, u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, - u32 ref, u32 key); void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); -int tipc_nametbl_init(void); -void tipc_nametbl_stop(void); +int tipc_nametbl_init(struct net *net); +void tipc_nametbl_stop(struct net *net); + +struct tipc_plist { + struct list_head list; + u32 port; +}; + +static inline void tipc_plist_init(struct tipc_plist *pl) +{ + INIT_LIST_HEAD(&pl->list); + pl->port = 0; +} + +void tipc_plist_push(struct tipc_plist *pl, u32 port); +u32 tipc_plist_pop(struct tipc_plist *pl); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index cf13df3cde8f..a54f3cbe2246 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -40,7 +40,6 @@ #include "subscr.h" #include "socket.h" #include "node.h" -#include "config.h" static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, @@ -108,48 +107,54 @@ static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { * - A local spin_lock protecting the queue of subscriber events. */ -int tipc_net_start(u32 addr) +int tipc_net_start(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); char addr_string[16]; int res; - tipc_own_addr = addr; - tipc_named_reinit(); - tipc_sk_reinit(); - res = tipc_bclink_init(); + tn->own_addr = addr; + tipc_named_reinit(net); + tipc_sk_reinit(net); + res = tipc_bclink_init(net); if (res) return res; - tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, - TIPC_ZONE_SCOPE, 0, tipc_own_addr); + tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, + TIPC_ZONE_SCOPE, 0, tn->own_addr); pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", - tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); + tipc_addr_string_fill(addr_string, tn->own_addr), + tn->net_id); return 0; } -void tipc_net_stop(void) +void tipc_net_stop(struct net *net) { - if (!tipc_own_addr) + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (!tn->own_addr) return; - tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); + tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0, + tn->own_addr); rtnl_lock(); - tipc_bearer_stop(); - tipc_bclink_stop(); - tipc_node_stop(); + tipc_bearer_stop(net); + tipc_bclink_stop(net); + tipc_node_stop(net); rtnl_unlock(); pr_info("Left network mode\n"); } -static int __tipc_nl_add_net(struct tipc_nl_msg *msg) +static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) { + struct tipc_net *tn = net_generic(net, tipc_net_id); void *hdr; struct nlattr *attrs; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NET_GET); if (!hdr) return -EMSGSIZE; @@ -158,7 +163,7 @@ static int __tipc_nl_add_net(struct tipc_nl_msg *msg) if (!attrs) goto msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tipc_net_id)) + if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id)) goto attr_msg_full; nla_nest_end(msg->skb, attrs); @@ -176,6 +181,7 @@ msg_full: int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); int err; int done = cb->args[0]; struct tipc_nl_msg msg; @@ -187,7 +193,7 @@ int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; - err = __tipc_nl_add_net(&msg); + err = __tipc_nl_add_net(net, &msg); if (err) goto out; @@ -200,8 +206,10 @@ out: int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) { - int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + int err; if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; @@ -216,21 +224,21 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) u32 val; /* Can't change net id once TIPC has joined a network */ - if (tipc_own_addr) + if (tn->own_addr) return -EPERM; val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); if (val < 1 || val > 9999) return -EINVAL; - tipc_net_id = val; + tn->net_id = val; } if (attrs[TIPC_NLA_NET_ADDR]) { u32 addr; /* Can't change net addr once TIPC has joined a network */ - if (tipc_own_addr) + if (tn->own_addr) return -EPERM; addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); @@ -238,7 +246,7 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; rtnl_lock(); - tipc_net_start(addr); + tipc_net_start(net, addr); rtnl_unlock(); } diff --git a/net/tipc/net.h b/net/tipc/net.h index a81c1b9eb150..77a7a118911d 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -39,9 +39,9 @@ #include <net/genetlink.h> -int tipc_net_start(u32 addr); +int tipc_net_start(struct net *net, u32 addr); -void tipc_net_stop(void); +void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index b891e3905bc4..7f6475efc984 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -35,7 +35,6 @@ */ #include "core.h" -#include "config.h" #include "socket.h" #include "name_table.h" #include "bearer.h" @@ -44,36 +43,6 @@ #include "net.h" #include <net/genetlink.h> -static int handle_cmd(struct sk_buff *skb, struct genl_info *info) -{ - struct sk_buff *rep_buf; - struct nlmsghdr *rep_nlh; - struct nlmsghdr *req_nlh = info->nlhdr; - struct tipc_genlmsghdr *req_userhdr = info->userhdr; - int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); - u16 cmd; - - if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN))) - cmd = TIPC_CMD_NOT_NET_ADMIN; - else - cmd = req_userhdr->cmd; - - rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, - nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), - hdr_space); - - if (rep_buf) { - skb_push(rep_buf, hdr_space); - rep_nlh = nlmsg_hdr(rep_buf); - memcpy(rep_nlh, req_nlh, hdr_space); - rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); - } - - return 0; -} - static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, @@ -86,32 +55,16 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } }; -/* Legacy ASCII API */ -static struct genl_family tipc_genl_family = { - .id = GENL_ID_GENERATE, - .name = TIPC_GENL_NAME, - .version = TIPC_GENL_VERSION, - .hdrsize = TIPC_GENL_HDRLEN, - .maxattr = 0, -}; - -/* Legacy ASCII API */ -static struct genl_ops tipc_genl_ops[] = { - { - .cmd = TIPC_GENL_CMD, - .doit = handle_cmd, - }, -}; - /* Users of the legacy API (tipc-config) can't handle that we add operations, * so we have a separate genl handling for the new API. */ -struct genl_family tipc_genl_v2_family = { +struct genl_family tipc_genl_family = { .id = GENL_ID_GENERATE, .name = TIPC_GENL_V2_NAME, .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .netnsok = true, }; static const struct genl_ops tipc_genl_v2_ops[] = { @@ -197,9 +150,9 @@ static const struct genl_ops tipc_genl_v2_ops[] = { int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) { - u32 maxattr = tipc_genl_v2_family.maxattr; + u32 maxattr = tipc_genl_family.maxattr; - *attr = tipc_genl_v2_family.attrbuf; + *attr = tipc_genl_family.attrbuf; if (!*attr) return -EOPNOTSUPP; @@ -210,13 +163,7 @@ int tipc_netlink_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_family, tipc_genl_ops); - if (res) { - pr_err("Failed to register legacy interface\n"); - return res; - } - - res = genl_register_family_with_ops(&tipc_genl_v2_family, + res = genl_register_family_with_ops(&tipc_genl_family, tipc_genl_v2_ops); if (res) { pr_err("Failed to register netlink interface\n"); @@ -228,5 +175,4 @@ int tipc_netlink_start(void) void tipc_netlink_stop(void) { genl_unregister_family(&tipc_genl_family); - genl_unregister_family(&tipc_genl_v2_family); } diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index 1425c6869de0..08a1db67b927 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -36,7 +36,7 @@ #ifndef _TIPC_NETLINK_H #define _TIPC_NETLINK_H -extern struct genl_family tipc_genl_v2_family; +extern struct genl_family tipc_genl_family; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); struct tipc_nl_msg { @@ -45,4 +45,9 @@ struct tipc_nl_msg { u32 seq; }; +int tipc_netlink_start(void); +int tipc_netlink_compat_start(void); +void tipc_netlink_stop(void); +void tipc_netlink_compat_stop(void); + #endif diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c new file mode 100644 index 000000000000..ce9121e8e990 --- /dev/null +++ b/net/tipc/netlink_compat.c @@ -0,0 +1,1084 @@ +/* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "bearer.h" +#include "link.h" +#include "name_table.h" +#include "socket.h" +#include "node.h" +#include "net.h" +#include <net/genetlink.h> +#include <linux/tipc_config.h> + +/* The legacy API had an artificial message length limit called + * ULTRA_STRING_MAX_LEN. + */ +#define ULTRA_STRING_MAX_LEN 32768 + +#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN) + +#define REPLY_TRUNCATED "<truncated>\n" + +struct tipc_nl_compat_msg { + u16 cmd; + int rep_type; + int rep_size; + int req_type; + struct sk_buff *rep; + struct tlv_desc *req; + struct sock *dst_sk; +}; + +struct tipc_nl_compat_cmd_dump { + int (*header)(struct tipc_nl_compat_msg *); + int (*dumpit)(struct sk_buff *, struct netlink_callback *); + int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs); +}; + +struct tipc_nl_compat_cmd_doit { + int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg); +}; + +static int tipc_skb_tailroom(struct sk_buff *skb) +{ + int tailroom; + int limit; + + tailroom = skb_tailroom(skb); + limit = TIPC_SKB_MAX - skb->len; + + if (tailroom < limit) + return tailroom; + + return limit; +} + +static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); + + if (tipc_skb_tailroom(skb) < TLV_SPACE(len)) + return -EMSGSIZE; + + skb_put(skb, TLV_SPACE(len)); + tlv->tlv_type = htons(type); + tlv->tlv_len = htons(TLV_LENGTH(len)); + if (len && data) + memcpy(TLV_DATA(tlv), data, len); + + return 0; +} + +static void tipc_tlv_init(struct sk_buff *skb, u16 type) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb->data; + + TLV_SET_LEN(tlv, 0); + TLV_SET_TYPE(tlv, type); + skb_put(skb, sizeof(struct tlv_desc)); +} + +static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...) +{ + int n; + u16 len; + u32 rem; + char *buf; + struct tlv_desc *tlv; + va_list args; + + rem = tipc_skb_tailroom(skb); + + tlv = (struct tlv_desc *)skb->data; + len = TLV_GET_LEN(tlv); + buf = TLV_DATA(tlv) + len; + + va_start(args, fmt); + n = vscnprintf(buf, rem, fmt, args); + va_end(args); + + TLV_SET_LEN(tlv, n + len); + skb_put(skb, n); + + return n; +} + +static struct sk_buff *tipc_tlv_alloc(int size) +{ + int hdr_len; + struct sk_buff *buf; + + size = TLV_SPACE(size); + hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + + buf = alloc_skb(hdr_len + size, GFP_KERNEL); + if (!buf) + return NULL; + + skb_reserve(buf, hdr_len); + + return buf; +} + +static struct sk_buff *tipc_get_err_tlv(char *str) +{ + int str_len = strlen(str) + 1; + struct sk_buff *buf; + + buf = tipc_tlv_alloc(TLV_SPACE(str_len)); + if (buf) + tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); + + return buf; +} + +static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg, + struct sk_buff *arg) +{ + int len = 0; + int err; + struct sk_buff *buf; + struct nlmsghdr *nlmsg; + struct netlink_callback cb; + + memset(&cb, 0, sizeof(cb)); + cb.nlh = (struct nlmsghdr *)arg->data; + cb.skb = arg; + + buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->sk = msg->dst_sk; + + do { + int rem; + + len = (*cmd->dumpit)(buf, &cb); + + nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { + struct nlattr **attrs; + + err = tipc_nlmsg_parse(nlmsg, &attrs); + if (err) + goto err_out; + + err = (*cmd->format)(msg, attrs); + if (err) + goto err_out; + + if (tipc_skb_tailroom(msg->rep) <= 1) { + err = -EMSGSIZE; + goto err_out; + } + } + + skb_reset_tail_pointer(buf); + buf->len = 0; + + } while (len); + + err = 0; + +err_out: + kfree_skb(buf); + + if (err == -EMSGSIZE) { + /* The legacy API only considered messages filling + * "ULTRA_STRING_MAX_LEN" to be truncated. + */ + if ((TIPC_SKB_MAX - msg->rep->len) <= 1) { + char *tail = skb_tail_pointer(msg->rep); + + if (*tail != '\0') + sprintf(tail - sizeof(REPLY_TRUNCATED) - 1, + REPLY_TRUNCATED); + } + + return 0; + } + + return err; +} + +static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *arg; + + if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + return -EINVAL; + + msg->rep = tipc_tlv_alloc(msg->rep_size); + if (!msg->rep) + return -ENOMEM; + + if (msg->rep_type) + tipc_tlv_init(msg->rep, msg->rep_type); + + if (cmd->header) + (*cmd->header)(msg); + + arg = nlmsg_new(0, GFP_KERNEL); + if (!arg) { + kfree_skb(msg->rep); + return -ENOMEM; + } + + err = __tipc_nl_compat_dumpit(cmd, msg, arg); + if (err) + kfree_skb(msg->rep); + + kfree_skb(arg); + + return err; +} + +static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *doit_buf; + struct sk_buff *trans_buf; + struct nlattr **attrbuf; + struct genl_info info; + + trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!trans_buf) + return -ENOMEM; + + err = (*cmd->transcode)(trans_buf, msg); + if (err) + goto trans_out; + + attrbuf = kmalloc((tipc_genl_family.maxattr + 1) * + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto trans_out; + } + + err = nla_parse(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL); + if (err) + goto parse_out; + + doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!doit_buf) { + err = -ENOMEM; + goto parse_out; + } + + doit_buf->sk = msg->dst_sk; + + memset(&info, 0, sizeof(info)); + info.attrs = attrbuf; + + err = (*cmd->doit)(doit_buf, &info); + + kfree_skb(doit_buf); +parse_out: + kfree(attrbuf); +trans_out: + kfree_skb(trans_buf); + + return err; +} + +static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + + if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + return -EINVAL; + + err = __tipc_nl_compat_doit(cmd, msg); + if (err) + return err; + + /* The legacy API considered an empty message a success message */ + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + + return 0; +} + +static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; + + nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], + NULL); + + return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, + nla_data(bearer[TIPC_NLA_BEARER_NAME]), + nla_len(bearer[TIPC_NLA_BEARER_NAME])); +} + +static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_bearer_config *b; + + b = (struct tipc_bearer_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain))) + return -EMSGSIZE; + + if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { + prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) + return -EMSGSIZE; + nla_nest_end(skb, prop); + } + nla_nest_end(skb, bearer); + + return 0; +} + +static int tipc_nl_compat_bearer_disable(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *bearer; + + name = (char *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, bearer); + + return 0; +} + +static inline u32 perc(u32 count, u32 total) +{ + return (count * 100 + (total / 2)) / total; +} + +static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, + struct nlattr *prop[], struct nlattr *stats[]) +{ + tipc_tlv_sprintf(msg->rep, " Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, " RX naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, " TX naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); +} + +static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char *name; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; + struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + + nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + + nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], + NULL); + + nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], + NULL); + + name = (char *)TLV_DATA(msg->req); + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) + return 0; + + tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n", + nla_data(link[TIPC_NLA_LINK_NAME])); + + if (link[TIPC_NLA_LINK_BROADCAST]) { + __fill_bc_link_stat(msg, prop, stats); + return 0; + } + + if (link[TIPC_NLA_LINK_ACTIVE]) + tipc_tlv_sprintf(msg->rep, " ACTIVE"); + else if (link[TIPC_NLA_LINK_UP]) + tipc_tlv_sprintf(msg->rep, " STANDBY"); + else + tipc_tlv_sprintf(msg->rep, " DEFUNCT"); + + tipc_tlv_sprintf(msg->rep, " MTU:%u Priority:%u", + nla_get_u32(link[TIPC_NLA_LINK_MTU]), + nla_get_u32(prop[TIPC_NLA_PROP_PRIO])); + + tipc_tlv_sprintf(msg->rep, " Tolerance:%u ms Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_TOL]), + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_RX]) - + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_TX]) - + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX profile sample:%u packets average:%u octets\n", + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) / + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])); + + tipc_tlv_sprintf(msg->rep, + " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, + " RX states:%u probes:%u naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, + " TX states:%u probes:%u naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); + + return 0; +} + +static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct tipc_link_info link_info; + + nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + + link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); + link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); + strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, + &link_info, sizeof(link_info)); +} + +static int tipc_nl_compat_link_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *link; + struct nlattr *prop; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP); + if (!prop) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_LINK_PRI) { + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value))) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) { + if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value))) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) { + if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value))) + return -EMSGSIZE; + } + + nla_nest_end(skb, prop); + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *link; + + name = (char *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) +{ + int i; + u32 depth; + struct tipc_name_table_query *ntq; + static const char * const header[] = { + "Type ", + "Lower Upper ", + "Port Identity ", + "Publication Scope" + }; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + + if (depth > 4) + depth = 4; + for (i = 0; i < depth; i++) + tipc_tlv_sprintf(msg->rep, header[i]); + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char port_str[27]; + struct tipc_name_table_query *ntq; + struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + u32 node, depth, type, lowbound, upbound; + static const char * const scope_str[] = {"", " zone", " cluster", + " node"}; + + nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); + + nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], + NULL); + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + type = ntohl(ntq->type); + lowbound = ntohl(ntq->lowbound); + upbound = ntohl(ntq->upbound); + + if (!(depth & TIPC_NTQ_ALLTYPES) && + (type != nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]))) + return 0; + if (lowbound && (lowbound > nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]))) + return 0; + if (upbound && (upbound < nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]))) + return 0; + + tipc_tlv_sprintf(msg->rep, "%-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_TYPE])); + + if (depth == 1) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]), + nla_get_u32(publ[TIPC_NLA_PUBL_UPPER])); + + if (depth == 2) + goto out; + + node = nla_get_u32(publ[TIPC_NLA_PUBL_NODE]); + sprintf(port_str, "<%u.%u.%u:%u>", tipc_zone(node), tipc_cluster(node), + tipc_node(node), nla_get_u32(publ[TIPC_NLA_PUBL_REF])); + tipc_tlv_sprintf(msg->rep, "%-26s ", port_str); + + if (depth == 3) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %s", + nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); +out: + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + u32 type, lower, upper; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + + nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL); + + type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]); + lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]); + upper = nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]); + + if (lower == upper) + tipc_tlv_sprintf(msg->rep, " {%u,%u}", type, lower); + else + tipc_tlv_sprintf(msg->rep, " {%u,%u,%u}", type, lower, upper); + + return 0; +} + +static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) +{ + int err; + void *hdr; + struct nlattr *nest; + struct sk_buff *args; + struct tipc_nl_compat_cmd_dump dump; + + args = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!args) + return -ENOMEM; + + hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_PUBL_GET); + + nest = nla_nest_start(args, TIPC_NLA_SOCK); + if (!nest) { + kfree_skb(args); + return -EMSGSIZE; + } + + if (nla_put_u32(args, TIPC_NLA_SOCK_REF, sock)) { + kfree_skb(args); + return -EMSGSIZE; + } + + nla_nest_end(args, nest); + genlmsg_end(args, hdr); + + dump.dumpit = tipc_nl_publ_dump; + dump.format = __tipc_nl_compat_publ_dump; + + err = __tipc_nl_compat_dumpit(&dump, msg, args); + + kfree_skb(args); + + return err; +} + +static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + int err; + u32 sock_ref; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], NULL); + + sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tipc_tlv_sprintf(msg->rep, "%u:", sock_ref); + + if (sock[TIPC_NLA_SOCK_CON]) { + u32 node; + struct nlattr *con[TIPC_NLA_CON_MAX + 1]; + + nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON], + NULL); + + node = nla_get_u32(con[TIPC_NLA_CON_NODE]); + tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", + tipc_zone(node), + tipc_cluster(node), + tipc_node(node), + nla_get_u32(con[TIPC_NLA_CON_SOCK])); + + if (con[TIPC_NLA_CON_FLAG]) + tipc_tlv_sprintf(msg->rep, " via {%u,%u}\n", + nla_get_u32(con[TIPC_NLA_CON_TYPE]), + nla_get_u32(con[TIPC_NLA_CON_INST])); + else + tipc_tlv_sprintf(msg->rep, "\n"); + } else if (sock[TIPC_NLA_SOCK_HAS_PUBL]) { + tipc_tlv_sprintf(msg->rep, " bound to"); + + err = tipc_nl_compat_publ_dump(msg, sock_ref); + if (err) + return err; + } + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1]; + + nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], + NULL); + + return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME, + nla_data(media[TIPC_NLA_MEDIA_NAME]), + nla_len(media[TIPC_NLA_MEDIA_NAME])); +} + +static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct tipc_node_info node_info; + struct nlattr *node[TIPC_NLA_NODE_MAX + 1]; + + nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], NULL); + + node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR])); + node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_NODE_INFO, &node_info, + sizeof(node_info)); +} + +static int tipc_nl_compat_net_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + u32 val; + struct nlattr *net; + + val = ntohl(*(__be32 *)TLV_DATA(msg->req)); + + net = nla_nest_start(skb, TIPC_NLA_NET); + if (!net) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_NODE_ADDR) { + if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val)) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_NETID) { + if (nla_put_u32(skb, TIPC_NLA_NET_ID, val)) + return -EMSGSIZE; + } + nla_nest_end(skb, net); + + return 0; +} + +static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + __be32 id; + struct nlattr *net[TIPC_NLA_NET_MAX + 1]; + + nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], NULL); + id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id)); +} + +static int tipc_cmd_show_stats_compat(struct tipc_nl_compat_msg *msg) +{ + msg->rep = tipc_tlv_alloc(ULTRA_STRING_MAX_LEN); + if (!msg->rep) + return -ENOMEM; + + tipc_tlv_init(msg->rep, TIPC_TLV_ULTRA_STRING); + tipc_tlv_sprintf(msg->rep, "TIPC version " TIPC_MOD_VER "\n"); + + return 0; +} + +static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) +{ + struct tipc_nl_compat_cmd_dump dump; + struct tipc_nl_compat_cmd_doit doit; + + memset(&dump, 0, sizeof(dump)); + memset(&doit, 0, sizeof(doit)); + + switch (msg->cmd) { + case TIPC_CMD_NOOP: + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + return 0; + case TIPC_CMD_GET_BEARER_NAMES: + msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME); + dump.dumpit = tipc_nl_bearer_dump; + dump.format = tipc_nl_compat_bearer_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_ENABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_CONFIG; + doit.doit = tipc_nl_bearer_enable; + doit.transcode = tipc_nl_compat_bearer_enable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_DISABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_NAME; + doit.doit = tipc_nl_bearer_disable; + doit.transcode = tipc_nl_compat_bearer_disable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_link_dump; + dump.format = tipc_nl_compat_link_stat_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_LINKS: + msg->req_type = TIPC_TLV_NET_ADDR; + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_link_dump; + dump.format = tipc_nl_compat_link_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_LINK_TOL: + case TIPC_CMD_SET_LINK_PRI: + case TIPC_CMD_SET_LINK_WINDOW: + msg->req_type = TIPC_TLV_LINK_CONFIG; + doit.doit = tipc_nl_link_set; + doit.transcode = tipc_nl_compat_link_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_RESET_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + doit.doit = tipc_nl_link_reset_stats; + doit.transcode = tipc_nl_compat_link_reset_stats; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_NAME_TABLE: + msg->req_type = TIPC_TLV_NAME_TBL_QUERY; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.header = tipc_nl_compat_name_table_dump_header; + dump.dumpit = tipc_nl_name_table_dump; + dump.format = tipc_nl_compat_name_table_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_PORTS: + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_sk_dump; + dump.format = tipc_nl_compat_sk_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_MEDIA_NAMES: + msg->rep_size = MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME); + dump.dumpit = tipc_nl_media_dump; + dump.format = tipc_nl_compat_media_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_NODES: + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_node_dump; + dump.format = tipc_nl_compat_node_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_NODE_ADDR: + msg->req_type = TIPC_TLV_NET_ADDR; + doit.doit = tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SET_NETID: + msg->req_type = TIPC_TLV_UNSIGNED; + doit.doit = tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_GET_NETID: + msg->rep_size = sizeof(u32); + dump.dumpit = tipc_nl_net_dump; + dump.format = tipc_nl_compat_net_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_STATS: + return tipc_cmd_show_stats_compat(msg); + } + + return -EOPNOTSUPP; +} + +static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int len; + struct tipc_nl_compat_msg msg; + struct nlmsghdr *req_nlh; + struct nlmsghdr *rep_nlh; + struct tipc_genlmsghdr *req_userhdr = info->userhdr; + struct net *net = genl_info_net(info); + + memset(&msg, 0, sizeof(msg)); + + req_nlh = (struct nlmsghdr *)skb->data; + msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; + msg.cmd = req_userhdr->cmd; + msg.dst_sk = info->dst_sk; + + if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); + err = -EACCES; + goto send; + } + + len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + err = -EOPNOTSUPP; + goto send; + } + + err = tipc_nl_compat_handle(&msg); + if (err == -EOPNOTSUPP) + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + else if (err == -EINVAL) + msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); +send: + if (!msg.rep) + return err; + + len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + skb_push(msg.rep, len); + rep_nlh = nlmsg_hdr(msg.rep); + memcpy(rep_nlh, info->nlhdr, len); + rep_nlh->nlmsg_len = msg.rep->len; + genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid); + + return err; +} + +static struct genl_family tipc_genl_compat_family = { + .id = GENL_ID_GENERATE, + .name = TIPC_GENL_NAME, + .version = TIPC_GENL_VERSION, + .hdrsize = TIPC_GENL_HDRLEN, + .maxattr = 0, + .netnsok = true, +}; + +static struct genl_ops tipc_genl_compat_ops[] = { + { + .cmd = TIPC_GENL_CMD, + .doit = tipc_nl_compat_recv, + }, +}; + +int tipc_netlink_compat_start(void) +{ + int res; + + res = genl_register_family_with_ops(&tipc_genl_compat_family, + tipc_genl_compat_ops); + if (res) { + pr_err("Failed to register legacy compat interface\n"); + return res; + } + + return 0; +} + +void tipc_netlink_compat_stop(void) +{ + genl_unregister_family(&tipc_genl_compat_family); +} diff --git a/net/tipc/node.c b/net/tipc/node.c index 8d353ec77a66..86152de8248d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -35,22 +35,14 @@ */ #include "core.h" -#include "config.h" +#include "link.h" #include "node.h" #include "name_distr.h" #include "socket.h" -#define NODE_HTABLE_SIZE 512 - static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); -static struct hlist_head node_htable[NODE_HTABLE_SIZE]; -LIST_HEAD(tipc_node_list); -static u32 tipc_num_nodes; -static u32 tipc_num_links; -static DEFINE_SPINLOCK(node_list_lock); - struct tipc_sock_conn { u32 port; u32 peer_port; @@ -78,15 +70,17 @@ static unsigned int tipc_hashfn(u32 addr) /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(u32 addr) +struct tipc_node *tipc_node_find(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; - if (unlikely(!in_own_cluster_exact(addr))) + if (unlikely(!in_own_cluster_exact(net, addr))) return NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(node, &node_htable[tipc_hashfn(addr)], hash) { + hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], + hash) { if (node->addr == addr) { rcu_read_unlock(); return node; @@ -96,72 +90,68 @@ struct tipc_node *tipc_node_find(u32 addr) return NULL; } -struct tipc_node *tipc_node_create(u32 addr) +struct tipc_node *tipc_node_create(struct net *net, u32 addr) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n_ptr, *temp_node; - spin_lock_bh(&node_list_lock); - + spin_lock_bh(&tn->node_list_lock); + n_ptr = tipc_node_find(net, addr); + if (n_ptr) + goto exit; n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); if (!n_ptr) { - spin_unlock_bh(&node_list_lock); pr_warn("Node creation failed, no memory\n"); - return NULL; + goto exit; } - n_ptr->addr = addr; + n_ptr->net = net; spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->waiting_sks); __skb_queue_head_init(&n_ptr->bclink.deferred_queue); - - hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); - - list_for_each_entry_rcu(temp_node, &tipc_node_list, list) { + hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) break; } list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; - - tipc_num_nodes++; - - spin_unlock_bh(&node_list_lock); +exit: + spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) { list_del_rcu(&n_ptr->list); hlist_del_rcu(&n_ptr->hash); kfree_rcu(n_ptr, rcu); - - tipc_num_nodes--; } -void tipc_node_stop(void) +void tipc_node_stop(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node, *t_node; - spin_lock_bh(&node_list_lock); - list_for_each_entry_safe(node, t_node, &tipc_node_list, list) - tipc_node_delete(node); - spin_unlock_bh(&node_list_lock); + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_safe(node, t_node, &tn->node_list, list) + tipc_node_delete(tn, node); + spin_unlock_bh(&tn->node_list_lock); } -int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port) +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) return 0; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (!node) { pr_warn("Connecting sock to node 0x%x failed\n", dnode); return -EHOSTUNREACH; @@ -179,15 +169,15 @@ int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port) return 0; } -void tipc_node_remove_conn(u32 dnode, u32 port) +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) { struct tipc_node *node; struct tipc_sock_conn *conn, *safe; - if (in_own_node(dnode)) + if (in_own_node(net, dnode)) return; - node = tipc_node_find(dnode); + node = tipc_node_find(net, dnode); if (!node) return; @@ -201,23 +191,6 @@ void tipc_node_remove_conn(u32 dnode, u32 port) tipc_node_unlock(node); } -void tipc_node_abort_sock_conns(struct list_head *conns) -{ - struct tipc_sock_conn *conn, *safe; - struct sk_buff *buf; - - list_for_each_entry_safe(conn, safe, conns, list) { - buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tipc_own_addr, - conn->peer_node, conn->port, - conn->peer_port, TIPC_ERR_NO_NODE); - if (likely(buf)) - tipc_sk_rcv(buf); - list_del(&conn->list); - kfree(conn); - } -} - /** * tipc_node_link_up - handle addition of link * @@ -231,8 +204,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; - pr_info("Established link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Established link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); if (!active[0]) { active[0] = active[1] = l_ptr; @@ -240,7 +213,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) goto exit; } if (l_ptr->priority < active[0]->priority) { - pr_info("New link <%s> becomes standby\n", l_ptr->name); + pr_debug("New link <%s> becomes standby\n", l_ptr->name); goto exit; } tipc_link_dup_queue_xmit(active[0], l_ptr); @@ -248,9 +221,9 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) active[0] = l_ptr; goto exit; } - pr_info("Old link <%s> becomes standby\n", active[0]->name); + pr_debug("Old link <%s> becomes standby\n", active[0]->name); if (active[1] != active[0]) - pr_info("Old link <%s> becomes standby\n", active[1]->name); + pr_debug("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; exit: /* Leave room for changeover header when returning 'mtu' to users: */ @@ -290,6 +263,7 @@ static void node_select_active_links(struct tipc_node *n_ptr) */ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); struct tipc_link **active; n_ptr->working_links--; @@ -297,12 +271,12 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; if (!tipc_link_is_active(l_ptr)) { - pr_info("Lost standby link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Lost standby link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); return; } - pr_info("Lost link <%s> on network plane %c\n", - l_ptr->name, l_ptr->net_plane); + pr_debug("Lost link <%s> on network plane %c\n", + l_ptr->name, l_ptr->net_plane); active = &n_ptr->active_links[0]; if (active[0] == l_ptr) @@ -324,7 +298,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) } /* Loopback link went down? No fragmentation needed from now on. */ - if (n_ptr->addr == tipc_own_addr) { + if (n_ptr->addr == tn->own_addr) { n_ptr->act_mtus[0] = MAX_MSG_SIZE; n_ptr->act_mtus[1] = MAX_MSG_SIZE; } @@ -343,9 +317,6 @@ int tipc_node_is_up(struct tipc_node *n_ptr) void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { n_ptr->links[l_ptr->bearer_id] = l_ptr; - spin_lock_bh(&node_list_lock); - tipc_num_links++; - spin_unlock_bh(&node_list_lock); n_ptr->link_cnt++; } @@ -357,9 +328,6 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (l_ptr != n_ptr->links[i]) continue; n_ptr->links[i] = NULL; - spin_lock_bh(&node_list_lock); - tipc_num_links--; - spin_unlock_bh(&node_list_lock); n_ptr->link_cnt--; } } @@ -368,17 +336,21 @@ static void node_established_contact(struct tipc_node *n_ptr) { n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP; n_ptr->bclink.oos_state = 0; - n_ptr->bclink.acked = tipc_bclink_get_last_sent(); - tipc_bclink_add_node(n_ptr->addr); + n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net); + tipc_bclink_add_node(n_ptr->net, n_ptr->addr); } static void node_lost_contact(struct tipc_node *n_ptr) { char addr_string[16]; - u32 i; + struct tipc_sock_conn *conn, *safe; + struct list_head *conns = &n_ptr->conn_sks; + struct sk_buff *skb; + struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id); + uint i; - pr_info("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + pr_debug("Lost contact with %s\n", + tipc_addr_string_fill(addr_string, n_ptr->addr)); /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { @@ -389,7 +361,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) n_ptr->bclink.reasm_buf = NULL; } - tipc_bclink_remove_node(n_ptr->addr); + tipc_bclink_remove_node(n_ptr->net, n_ptr->addr); tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); n_ptr->bclink.recv_permitted = false; @@ -403,126 +375,33 @@ static void node_lost_contact(struct tipc_node *n_ptr) l_ptr->reset_checkpoint = l_ptr->next_in_no; l_ptr->exp_msg_count = 0; tipc_link_reset_fragments(l_ptr); - } - - n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; - - /* Notify subscribers and prevent re-contact with node until - * cleanup is done. - */ - n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN | - TIPC_NOTIFY_NODE_DOWN; -} -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) -{ - u32 domain; - struct sk_buff *buf; - struct tipc_node *n_ptr; - struct tipc_node_info node_info; - u32 payload_size; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (!tipc_addr_domain_valid(domain)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network address)"); - - spin_lock_bh(&node_list_lock); - if (!tipc_num_nodes) { - spin_unlock_bh(&node_list_lock); - return tipc_cfg_reply_none(); + /* Link marked for deletion after failover? => do it now */ + if (l_ptr->flags & LINK_STOPPED) + tipc_link_delete(l_ptr); } - /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; - if (payload_size > 32768u) { - spin_unlock_bh(&node_list_lock); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (too many nodes)"); - } - spin_unlock_bh(&node_list_lock); - - buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) - return NULL; - - /* Add TLVs for all nodes in scope */ - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - if (!tipc_in_scope(domain, n_ptr->addr)) - continue; - node_info.addr = htonl(n_ptr->addr); - node_info.up = htonl(tipc_node_is_up(n_ptr)); - tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO, - &node_info, sizeof(node_info)); - } - rcu_read_unlock(); - return buf; -} - -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) -{ - u32 domain; - struct sk_buff *buf; - struct tipc_node *n_ptr; - struct tipc_link_info link_info; - u32 payload_size; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (!tipc_addr_domain_valid(domain)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (network address)"); - - if (!tipc_own_addr) - return tipc_cfg_reply_none(); - - spin_lock_bh(&node_list_lock); - /* Get space for all unicast links + broadcast link */ - payload_size = TLV_SPACE((sizeof(link_info)) * (tipc_num_links + 1)); - if (payload_size > 32768u) { - spin_unlock_bh(&node_list_lock); - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (too many links)"); - } - spin_unlock_bh(&node_list_lock); - - buf = tipc_cfg_reply_alloc(payload_size); - if (!buf) - return NULL; + n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; - /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); - link_info.up = htonl(1); - strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); - tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); + /* Prevent re-contact with node until cleanup is done */ + n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN; - /* Add TLVs for any other links in scope */ - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - u32 i; + /* Notify publications from this node */ + n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; - if (!tipc_in_scope(domain, n_ptr->addr)) - continue; - tipc_node_lock(n_ptr); - for (i = 0; i < MAX_BEARERS; i++) { - if (!n_ptr->links[i]) - continue; - link_info.dest = htonl(n_ptr->addr); - link_info.up = htonl(tipc_link_is_up(n_ptr->links[i])); - strcpy(link_info.str, n_ptr->links[i]->name); - tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, - &link_info, sizeof(link_info)); + /* Notify sockets connected to node */ + list_for_each_entry_safe(conn, safe, conns, list) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tn->own_addr, + conn->peer_node, conn->port, + conn->peer_port, TIPC_ERR_NO_NODE); + if (likely(skb)) { + skb_queue_tail(n_ptr->inputq, skb); + n_ptr->action_flags |= TIPC_MSG_EVT; } - tipc_node_unlock(n_ptr); + list_del(&conn->list); + kfree(conn); } - rcu_read_unlock(); - return buf; } /** @@ -534,10 +413,11 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) * * Returns 0 on success */ -int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, + char *linkname, size_t len) { struct tipc_link *link; - struct tipc_node *node = tipc_node_find(addr); + struct tipc_node *node = tipc_node_find(net, addr); if ((bearer_id >= MAX_BEARERS) || !node) return -EINVAL; @@ -554,58 +434,60 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) void tipc_node_unlock(struct tipc_node *node) { - LIST_HEAD(nsub_list); - LIST_HEAD(conn_sks); - struct sk_buff_head waiting_sks; + struct net *net = node->net; u32 addr = 0; - int flags = node->action_flags; + u32 flags = node->action_flags; u32 link_id = 0; + struct list_head *publ_list; + struct sk_buff_head *inputq = node->inputq; + struct sk_buff_head *namedq; - if (likely(!flags)) { + if (likely(!flags || (flags == TIPC_MSG_EVT))) { + node->action_flags = 0; spin_unlock_bh(&node->lock); + if (flags == TIPC_MSG_EVT) + tipc_sk_rcv(net, inputq); return; } addr = node->addr; link_id = node->link_id; - __skb_queue_head_init(&waiting_sks); + namedq = node->namedq; + publ_list = &node->publ_list; - if (flags & TIPC_WAKEUP_USERS) - skb_queue_splice_init(&node->waiting_sks, &waiting_sks); - - if (flags & TIPC_NOTIFY_NODE_DOWN) { - list_replace_init(&node->publ_list, &nsub_list); - list_replace_init(&node->conn_sks, &conn_sks); - } - node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN | - TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP | - TIPC_NOTIFY_LINK_DOWN | - TIPC_WAKEUP_BCAST_USERS); + node->action_flags &= ~(TIPC_MSG_EVT | + TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | + TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | + TIPC_NAMED_MSG_EVT); spin_unlock_bh(&node->lock); - while (!skb_queue_empty(&waiting_sks)) - tipc_sk_rcv(__skb_dequeue(&waiting_sks)); - - if (!list_empty(&conn_sks)) - tipc_node_abort_sock_conns(&conn_sks); - - if (!list_empty(&nsub_list)) - tipc_publ_notify(&nsub_list, addr); + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); if (flags & TIPC_WAKEUP_BCAST_USERS) - tipc_bclink_wakeup_users(); + tipc_bclink_wakeup_users(net); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(addr); + tipc_named_node_up(net, addr); if (flags & TIPC_NOTIFY_LINK_UP) - tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, link_id, addr); if (flags & TIPC_NOTIFY_LINK_DOWN) - tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); + + if (flags & TIPC_MSG_EVT) + tipc_sk_rcv(net, inputq); + + if (flags & TIPC_NAMED_MSG_EVT) + tipc_named_rcv(net, namedq); + + if (flags & TIPC_BCAST_MSG_EVT) + tipc_bclink_input(net); } /* Caller should hold node lock for the passed node */ @@ -614,7 +496,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) void *hdr; struct nlattr *attrs; - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NODE_GET); if (!hdr) return -EMSGSIZE; @@ -645,6 +527,8 @@ msg_full: int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); int done = cb->args[0]; int last_addr = cb->args[1]; struct tipc_node *node; @@ -659,7 +543,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); - if (last_addr && !tipc_node_find(last_addr)) { + if (last_addr && !tipc_node_find(net, last_addr)) { rcu_read_unlock(); /* We never set seq or call nl_dump_check_consistent() this * means that setting prev_seq here will cause the consistence @@ -671,7 +555,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) return -EPIPE; } - list_for_each_entry_rcu(node, &tipc_node_list, list) { + list_for_each_entry_rcu(node, &tn->node_list, list) { if (last_addr) { if (node->addr == last_addr) last_addr = 0; diff --git a/net/tipc/node.h b/net/tipc/node.h index cbe0e950f1cc..3d18c66b7f78 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -1,7 +1,7 @@ /* * net/tipc/node.h: Include file for TIPC node management routines * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * @@ -42,10 +42,10 @@ #include "bearer.h" #include "msg.h" -/* - * Out-of-range value for node signature - */ -#define INVALID_NODE_SIG 0x10000 +/* Out-of-range value for node signature */ +#define INVALID_NODE_SIG 0x10000 + +#define NODE_HTABLE_SIZE 512 /* Flags used to take different actions according to flag type * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down @@ -55,14 +55,16 @@ * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type */ enum { + TIPC_MSG_EVT = 1, TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_WAKEUP_USERS = (1 << 5), - TIPC_WAKEUP_BCAST_USERS = (1 << 6), - TIPC_NOTIFY_LINK_UP = (1 << 7), - TIPC_NOTIFY_LINK_DOWN = (1 << 8) + TIPC_WAKEUP_BCAST_USERS = (1 << 5), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7), + TIPC_NAMED_MSG_EVT = (1 << 8), + TIPC_BCAST_MSG_EVT = (1 << 9) }; /** @@ -73,6 +75,7 @@ enum { * @oos_state: state tracker for handling OOS b'cast messages * @deferred_queue: deferred queue saved OOS b'cast message received from node * @reasm_buf: broadcast reassembly queue head from node + * @inputq_map: bitmap indicating which inqueues should be kicked * @recv_permitted: true if node is allowed to receive b'cast messages */ struct tipc_node_bclink { @@ -83,6 +86,7 @@ struct tipc_node_bclink { u32 deferred_size; struct sk_buff_head deferred_queue; struct sk_buff *reasm_buf; + int inputq_map; bool recv_permitted; }; @@ -90,7 +94,11 @@ struct tipc_node_bclink { * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure + * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @curr_link: the link holding the node lock, if any * @active_links: pointers to active links to node * @links: pointers to all links to node * @action_flags: bit mask of different types of node actions @@ -106,11 +114,14 @@ struct tipc_node_bclink { struct tipc_node { u32 addr; spinlock_t lock; + struct net *net; struct hlist_node hash; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; struct tipc_link *active_links[2]; u32 act_mtus[2]; struct tipc_link *links[MAX_BEARERS]; - unsigned int action_flags; + int action_flags; struct tipc_node_bclink bclink; struct list_head list; int link_cnt; @@ -118,28 +129,24 @@ struct tipc_node { u32 signature; u32 link_id; struct list_head publ_list; - struct sk_buff_head waiting_sks; struct list_head conn_sks; struct rcu_head rcu; }; -extern struct list_head tipc_node_list; - -struct tipc_node *tipc_node_find(u32 addr); -struct tipc_node *tipc_node_create(u32 addr); -void tipc_node_stop(void); +struct tipc_node *tipc_node_find(struct net *net, u32 addr); +struct tipc_node *tipc_node_create(struct net *net, u32 addr); +void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); -struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, + char *linkname, size_t len); void tipc_node_unlock(struct tipc_node *node); -int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port); -void tipc_node_remove_conn(u32 dnode, u32 port); +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); @@ -154,12 +161,12 @@ static inline bool tipc_node_blocked(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } -static inline uint tipc_node_get_mtu(u32 addr, u32 selector) +static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) { struct tipc_node *node; u32 mtu; - node = tipc_node_find(addr); + node = tipc_node_find(net, addr); if (likely(node)) mtu = node->act_mtus[selector & 1]; diff --git a/net/tipc/server.c b/net/tipc/server.c index a538a02f869b..eadd4ed45905 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -35,6 +35,7 @@ #include "server.h" #include "core.h" +#include "socket.h" #include <net/sock.h> /* Number of messages to send before rescheduling */ @@ -255,7 +256,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con) goto out_close; } - s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); + s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, + con->usr_data, buf, ret); kmem_cache_free(s->rcvbuf_cache, buf); @@ -307,7 +309,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = tipc_sock_create_local(s->type, &sock); + ret = tipc_sock_create_local(s->net, s->type, &sock); if (ret < 0) return NULL; ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, diff --git a/net/tipc/server.h b/net/tipc/server.h index be817b0b547e..9015faedb1b0 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -36,7 +36,9 @@ #ifndef _TIPC_SERVER_H #define _TIPC_SERVER_H -#include "core.h" +#include <linux/idr.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> #define TIPC_SERVER_NAME_LEN 32 @@ -45,6 +47,7 @@ * @conn_idr: identifier set of connection * @idr_lock: protect the connection identifier set * @idr_in_use: amount of allocated identifier entry + * @net: network namspace instance * @rcvbuf_cache: memory cache of server receive buffer * @rcv_wq: receive workqueue * @send_wq: send workqueue @@ -61,16 +64,18 @@ struct tipc_server { struct idr conn_idr; spinlock_t idr_lock; int idr_in_use; + struct net *net; struct kmem_cache *rcvbuf_cache; struct workqueue_struct *rcv_wq; struct workqueue_struct *send_wq; int max_rcvbuf_size; - void *(*tipc_conn_new) (int conid); - void (*tipc_conn_shutdown) (int conid, void *usr_data); - void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); + void *(*tipc_conn_new)(int conid); + void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_recvmsg)(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len); struct sockaddr_tipc *saddr; - const char name[TIPC_SERVER_NAME_LEN]; + char name[TIPC_SERVER_NAME_LEN]; int imp; int type; }; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4731cad99d1c..f73e975af80b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2014, Ericsson AB + * Copyright (c) 2001-2007, 2012-2015, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -34,22 +34,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/rhashtable.h> +#include <linux/jhash.h> #include "core.h" #include "name_table.h" #include "node.h" #include "link.h" -#include <linux/export.h> -#include "config.h" +#include "name_distr.h" #include "socket.h" -#define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ +#define SS_LISTENING -1 /* socket is listening */ +#define SS_READY -2 /* socket is connectionless */ -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 +#define TIPC_MAX_PORT 0xffffffff +#define TIPC_MIN_PORT 1 /** * struct tipc_sock - TIPC socket structure @@ -59,21 +62,20 @@ * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names * @max_pkt: maximum packet size "hint" used when building messages sent by port - * @ref: unique reference to port in TIPC object registry + * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_interval: - * @timer: - * @port: port - interacts with 'sk' and with the rest of the TIPC stack - * @peer_name: the peer of the connection, if any + * @probing_intv: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @node: hash table node + * @rcu: rcu struct for tipc_sock */ struct tipc_sock { struct sock sk; @@ -82,19 +84,20 @@ struct tipc_sock { u32 conn_instance; int published; u32 max_pkt; - u32 ref; + u32 portid; struct tipc_msg phdr; struct list_head sock_list; struct list_head publications; u32 pub_count; u32 probing_state; - u32 probing_interval; - struct timer_list timer; + unsigned long probing_intv; uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; uint sent_unacked; uint rcv_unacked; + struct rhash_head node; + struct rcu_head rcu; }; static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -103,16 +106,14 @@ static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); -static void tipc_sk_timeout(unsigned long ref); +static void tipc_sk_timeout(unsigned long data); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); -static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk); -static void tipc_sk_ref_discard(u32 ref); -static struct tipc_sock *tipc_sk_get(u32 ref); -static struct tipc_sock *tipc_sk_get_next(u32 *ref); -static void tipc_sk_put(struct tipc_sock *tsk); +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); +static int tipc_sk_insert(struct tipc_sock *tsk); +static void tipc_sk_remove(struct tipc_sock *tsk); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -174,6 +175,11 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { * - port reference */ +static u32 tsk_own_node(struct tipc_sock *tsk) +{ + return msg_prevnode(&tsk->phdr); +} + static u32 tsk_peer_node(struct tipc_sock *tsk) { return msg_destnode(&tsk->phdr); @@ -246,10 +252,11 @@ static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *skb; u32 dnode; + u32 own_node = tsk_own_node(tipc_sk(sk)); while ((skb = __skb_dequeue(&sk->sk_receive_queue))) { - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0); } } @@ -260,6 +267,7 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { + struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; @@ -276,10 +284,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) if (likely(orig_node == peer_node)) return true; - if (!orig_node && (peer_node == tipc_own_addr)) + if (!orig_node && (peer_node == tn->own_addr)) return true; - if (!peer_node && (orig_node == tipc_own_addr)) + if (!peer_node && (orig_node == tn->own_addr)) return true; return false; @@ -300,12 +308,12 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, int kern) { + struct tipc_net *tn; const struct proto_ops *ops; socket_state state; struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref; /* Validate arguments */ if (unlikely(protocol != 0)) @@ -339,24 +347,23 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -ENOMEM; tsk = tipc_sk(sk); - ref = tipc_sk_ref_acquire(tsk); - if (!ref) { - pr_warn("Socket create failed; reference table exhausted\n"); - return -ENOMEM; - } tsk->max_pkt = MAX_PKT_DEFAULT; - tsk->ref = ref; INIT_LIST_HEAD(&tsk->publications); msg = &tsk->phdr; - tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + tn = net_generic(sock_net(sk), tipc_net_id); + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); - msg_set_origport(msg, ref); /* Finish initializing socket data structures */ sock->ops = ops; sock->state = state; sock_init_data(sock, sk); - k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref); + if (tipc_sk_insert(tsk)) { + pr_warn("Socket create failed; port numbrer exhausted\n"); + return -EINVAL; + } + msg_set_origport(msg, tsk->portid); + setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -384,7 +391,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, * * Returns 0 on success, errno otherwise */ -int tipc_sock_create_local(int type, struct socket **res) +int tipc_sock_create_local(struct net *net, int type, struct socket **res) { int rc; @@ -393,7 +400,7 @@ int tipc_sock_create_local(int type, struct socket **res) pr_err("Failed to create kernel socket\n"); return rc; } - tipc_sk_create(&init_net, *res, 0, 1); + tipc_sk_create(net, *res, 0, 1); return 0; } @@ -442,6 +449,13 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, return ret; } +static void tipc_sk_callback(struct rcu_head *head) +{ + struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); + + sock_put(&tsk->sk); +} + /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy @@ -461,9 +475,10 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net; struct tipc_sock *tsk; struct sk_buff *skb; - u32 dnode; + u32 dnode, probing_state; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -472,6 +487,7 @@ static int tipc_release(struct socket *sock) if (sk == NULL) return 0; + net = sock_net(sk); tsk = tipc_sk(sk); lock_sock(sk); @@ -491,26 +507,29 @@ static int tipc_release(struct socket *sock) (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; tsk->connected = 0; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(net, dnode, tsk->portid); } - if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit_skb(skb, dnode, 0); + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + TIPC_ERR_NO_PORT)) + tipc_link_xmit_skb(net, skb, dnode, 0); } } tipc_sk_withdraw(tsk, 0, NULL); - tipc_sk_ref_discard(tsk->ref); - k_cancel_timer(&tsk->timer); + probing_state = tsk->probing_state; + if (del_timer_sync(&sk->sk_timer) && + probing_state != TIPC_CONN_PROBING) + sock_put(sk); + tipc_sk_remove(tsk); if (tsk->connected) { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, dnode, tipc_own_addr, - tsk_peer_port(tsk), - tsk->ref, TIPC_ERR_NO_PORT); + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, TIPC_ERR_NO_PORT); if (skb) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } - k_term_timer(&tsk->timer); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -518,7 +537,8 @@ static int tipc_release(struct socket *sock) /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; release_sock(sk); - sock_put(sk); + + call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; return 0; @@ -602,6 +622,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; struct tipc_sock *tsk = tipc_sk(sock->sk); + struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { @@ -611,8 +632,8 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); } else { - addr->addr.id.ref = tsk->ref; - addr->addr.id.node = tipc_own_addr; + addr->addr.id.ref = tsk->portid; + addr->addr.id.node = tn->own_addr; } *uaddr_len = sizeof(*addr); @@ -711,8 +732,11 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct msghdr *msg, size_t dsz, long timeo) { struct sock *sk = sock->sk; - struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; - struct sk_buff_head head; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + struct tipc_msg *mhdr = &tsk->phdr; + struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -727,83 +751,97 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, new_mtu: mtu = tipc_bclink_get_mtu(); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bclink_xmit(&head); + rc = tipc_bclink_xmit(net, pktchain); if (likely(rc >= 0)) { rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + msg->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tipc_sk(sk)->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); return rc; } -/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets +/** + * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @arrvq: queue with arriving messages, to be cloned after destination lookup + * @inputq: queue with cloned messages, delivered to socket after dest lookup + * + * Multi-threaded: parallel calls with reference to same queues may occur */ -void tipc_sk_mcast_rcv(struct sk_buff *buf) +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq) { - struct tipc_msg *msg = buf_msg(buf); - struct tipc_port_list dports = {0, NULL, }; - struct tipc_port_list *item; - struct sk_buff *b; - uint i, last, dst = 0; + struct tipc_msg *msg; + struct tipc_plist dports; + u32 portid; u32 scope = TIPC_CLUSTER_SCOPE; - - if (in_own_node(msg_orignode(msg))) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list: */ - tipc_nametbl_mc_translate(msg_nametype(msg), - msg_namelower(msg), - msg_nameupper(msg), - scope, - &dports); - last = dports.count; - if (!last) { - kfree_skb(buf); - return; - } - - for (item = &dports; item; item = item->next) { - for (i = 0; i < PLSIZE && ++dst <= last; i++) { - b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf; - if (!b) { - pr_warn("Failed do clone mcast rcv buffer\n"); + struct sk_buff_head tmpq; + uint hsz; + struct sk_buff *skb, *_skb; + + __skb_queue_head_init(&tmpq); + tipc_plist_init(&dports); + + skb = tipc_skb_peek(arrvq, &inputq->lock); + for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { + msg = buf_msg(skb); + hsz = skb_headroom(skb) + msg_hdr_sz(msg); + + if (in_own_node(net, msg_orignode(msg))) + scope = TIPC_NODE_SCOPE; + + /* Create destination port list and message clones: */ + tipc_nametbl_mc_translate(net, + msg_nametype(msg), msg_namelower(msg), + msg_nameupper(msg), scope, &dports); + portid = tipc_plist_pop(&dports); + for (; portid; portid = tipc_plist_pop(&dports)) { + _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + if (_skb) { + msg_set_destport(buf_msg(_skb), portid); + __skb_queue_tail(&tmpq, _skb); continue; } - msg_set_destport(msg, item->ports[i]); - tipc_sk_rcv(b); + pr_warn("Failed to clone mcast rcv buffer\n"); } + /* Append to inputq if not already done by other thread */ + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + skb_queue_splice_tail_init(&tmpq, inputq); + kfree_skb(__skb_dequeue(arrvq)); + } + spin_unlock_bh(&inputq->lock); + __skb_queue_purge(&tmpq); + kfree_skb(skb); } - tipc_port_list_free(&dports); + tipc_sk_rcv(net, inputq); } /** * tipc_sk_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket - * @dnode: node to send response message to, if any - * @buf: buffer containing protocol message - * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if - * (CONN_PROBE_REPLY) message should be forwarded. + * @skb: pointer to message buffer. Set to NULL if buffer is consumed. */ -static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, - struct sk_buff *buf) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb) { - struct tipc_msg *msg = buf_msg(buf); + struct tipc_msg *msg = buf_msg(*skb); int conn_cong; - + u32 dnode; + u32 own_node = tsk_own_node(tsk); /* Ignore if connection cannot be validated: */ if (!tsk_peer_msg(tsk, msg)) goto exit; @@ -816,15 +854,15 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, if (conn_cong) tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { - if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) - return TIPC_OK; - msg_set_type(msg, CONN_PROBE_REPLY); - return TIPC_FWD_MSG; + if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) { + msg_set_type(msg, CONN_PROBE_REPLY); + return; + } } /* Do nothing if msg_type() == CONN_PROBE_REPLY */ exit: - kfree_skb(buf); - return TIPC_OK; + kfree_skb(*skb); + *skb = NULL; } static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) @@ -872,11 +910,13 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head head; + struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff *skb; struct tipc_name_seq *seq = &dest->addr.nameseq; + struct iov_iter save; u32 mtu; long timeo; int rc; @@ -929,7 +969,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_nametype(mhdr, type); msg_set_nameinst(mhdr, inst); msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); - dport = tipc_nametbl_translate(type, inst, &dnode); + dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(mhdr, dnode); msg_set_destport(mhdr, dport); if (unlikely(!dport && !dnode)) { @@ -945,31 +985,33 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, msg_set_hdr_sz(mhdr, BASIC_H_SIZE); } + save = m->msg_iter; new_mtu: - mtu = tipc_node_get_mtu(dnode, tsk->ref); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); if (rc < 0) goto exit; do { - skb = skb_peek(&head); + skb = skb_peek(pktchain); TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; - rc = tipc_link_xmit(&head, dnode, tsk->ref); + rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; rc = dsz; break; } - if (rc == -EMSGSIZE) + if (rc == -EMSGSIZE) { + m->msg_iter = save; goto new_mtu; + } if (rc != -ELINKCONG) break; tsk->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); exit: if (iocb) @@ -1024,15 +1066,17 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head head; + struct sk_buff_head *pktchain = &sk->sk_write_queue; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - u32 ref = tsk->ref; + u32 portid = tsk->portid; int rc = -EINVAL; long timeo; u32 dnode; uint mtu, send, sent = 0; + struct iov_iter save; /* Handle implied connection establishment */ if (unlikely(dest)) { @@ -1059,15 +1103,15 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, dnode = tsk_peer_node(tsk); next: + save = m->msg_iter; mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); - __skb_queue_head_init(&head); - rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head); + rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); if (unlikely(rc < 0)) goto exit; do { if (likely(!tsk_conn_cong(tsk))) { - rc = tipc_link_xmit(&head, dnode, ref); + rc = tipc_link_xmit(net, pktchain, dnode, portid); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1076,7 +1120,9 @@ next: goto next; } if (rc == -EMSGSIZE) { - tsk->max_pkt = tipc_node_get_mtu(dnode, ref); + tsk->max_pkt = tipc_node_get_mtu(net, dnode, + portid); + m->msg_iter = save; goto next; } if (rc != -ELINKCONG) @@ -1085,7 +1131,7 @@ next: } rc = tipc_wait_for_sndpkt(sock, &timeo); if (rc) - __skb_queue_purge(&head); + __skb_queue_purge(pktchain); } while (!rc); exit: if (iocb) @@ -1118,6 +1164,8 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, u32 peer_node) { + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct tipc_msg *msg = &tsk->phdr; msg_set_destnode(msg, peer_node); @@ -1126,12 +1174,12 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_interval = CONN_PROBING_INTERVAL; + tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; tsk->connected = 1; - k_start_timer(&tsk->timer, tsk->probing_interval); - tipc_node_add_conn(peer_node, tsk->ref, peer_port); - tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); } /** @@ -1230,6 +1278,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) { + struct net *net = sock_net(&tsk->sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); @@ -1237,13 +1286,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!tsk->connected) return; - skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, - tipc_own_addr, peer_port, tsk->ref, TIPC_OK); + skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tsk_own_node(tsk), peer_port, + tsk->portid, TIPC_OK); if (!skb) return; msg = buf_msg(skb); msg_set_msgcnt(msg, ack); - tipc_link_xmit_skb(skb, dnode, msg_link_selector(msg)); + tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1529,15 +1579,16 @@ static void tipc_data_ready(struct sock *sk) /** * filter_connect - Handle all incoming messages for a connection-based socket * @tsk: TIPC socket - * @msg: message + * @skb: pointer to message buffer. Set to NULL if buffer is consumed * * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise */ -static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb) { struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct socket *sock = sk->sk_socket; - struct tipc_msg *msg = buf_msg(*buf); + struct tipc_msg *msg = buf_msg(*skb); int retval = -TIPC_ERR_NO_PORT; if (msg_mcast(msg)) @@ -1551,8 +1602,8 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) sock->state = SS_DISCONNECTING; tsk->connected = 0; /* let timer expire on it's own */ - tipc_node_remove_conn(tsk_peer_node(tsk), - tsk->ref); + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); } retval = TIPC_OK; } @@ -1587,8 +1638,8 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) * connect() routine if sleeping. */ if (msg_data_sz(msg) == 0) { - kfree_skb(*buf); - *buf = NULL; + kfree_skb(*skb); + *skb = NULL; if (waitqueue_active(sk_sleep(sk))) wake_up_interruptible(sk_sleep(sk)); } @@ -1640,32 +1691,33 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) /** * filter_rcv - validate incoming message * @sk: socket - * @buf: message + * @skb: pointer to message. Set to NULL if buffer is consumed. * * Enqueues message on receive queue if acceptable; optionally handles * disconnect indication for a connected socket. * - * Called with socket lock already taken; port lock may also be taken. + * Called with socket lock already taken * - * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message - * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded + * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected */ -static int filter_rcv(struct sock *sk, struct sk_buff *buf) +static int filter_rcv(struct sock *sk, struct sk_buff **skb) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_msg *msg = buf_msg(buf); - unsigned int limit = rcvbuf_limit(sk, buf); - u32 onode; + struct tipc_msg *msg = buf_msg(*skb); + unsigned int limit = rcvbuf_limit(sk, *skb); int rc = TIPC_OK; - if (unlikely(msg_user(msg) == CONN_MANAGER)) - return tipc_sk_proto_rcv(tsk, &onode, buf); + if (unlikely(msg_user(msg) == CONN_MANAGER)) { + tipc_sk_proto_rcv(tsk, skb); + return TIPC_OK; + } if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { - kfree_skb(buf); + kfree_skb(*skb); tsk->link_cong = 0; sk->sk_write_space(sk); + *skb = NULL; return TIPC_OK; } @@ -1677,21 +1729,22 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) if (msg_connected(msg)) return -TIPC_ERR_NO_PORT; } else { - rc = filter_connect(tsk, &buf); - if (rc != TIPC_OK || buf == NULL) + rc = filter_connect(tsk, skb); + if (rc != TIPC_OK || !*skb) return rc; } /* Reject message if there isn't room to queue it */ - if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) + if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit) return -TIPC_ERR_OVERLOAD; /* Enqueue message */ - TIPC_SKB_CB(buf)->handle = NULL; - __skb_queue_tail(&sk->sk_receive_queue, buf); - skb_set_owner_r(buf, sk); + TIPC_SKB_CB(*skb)->handle = NULL; + __skb_queue_tail(&sk->sk_receive_queue, *skb); + skb_set_owner_r(*skb, sk); sk->sk_data_ready(sk); + *skb = NULL; return TIPC_OK; } @@ -1700,78 +1753,125 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) * @sk: socket * @skb: message * - * Caller must hold socket lock, but not port lock. + * Caller must hold socket lock * * Returns 0 */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - int rc; - u32 onode; + int err; + atomic_t *dcnt; + u32 dnode; struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); uint truesize = skb->truesize; - rc = filter_rcv(sk, skb); - - if (likely(!rc)) { - if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, &tsk->dupl_rcvcnt); + err = filter_rcv(sk, &skb); + if (likely(!skb)) { + dcnt = &tsk->dupl_rcvcnt; + if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, dcnt); return 0; } + if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err)) + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); + return 0; +} - if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc)) - return 0; - - tipc_link_xmit_skb(skb, onode, 0); +/** + * tipc_sk_enqueue - extract all buffers with destination 'dport' from + * inputq and try adding them to socket or backlog queue + * @inputq: list of incoming buffers with potentially different destinations + * @sk: socket where the buffers should be enqueued + * @dport: port number for the socket + * @_skb: returned buffer to be forwarded or rejected, if applicable + * + * Caller must hold socket lock + * + * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD + * or -TIPC_ERR_NO_PORT + */ +static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport, struct sk_buff **_skb) +{ + unsigned int lim; + atomic_t *dcnt; + int err; + struct sk_buff *skb; + unsigned long time_limit = jiffies + 2; - return 0; + while (skb_queue_len(inputq)) { + if (unlikely(time_after_eq(jiffies, time_limit))) + return TIPC_OK; + skb = tipc_skb_dequeue(inputq, dport); + if (unlikely(!skb)) + return TIPC_OK; + if (!sock_owned_by_user(sk)) { + err = filter_rcv(sk, &skb); + if (likely(!skb)) + continue; + *_skb = skb; + return err; + } + dcnt = &tipc_sk(sk)->dupl_rcvcnt; + if (sk->sk_backlog.len) + atomic_set(dcnt, 0); + lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); + if (likely(!sk_add_backlog(sk, skb, lim))) + continue; + *_skb = skb; + return -TIPC_ERR_OVERLOAD; + } + return TIPC_OK; } /** - * tipc_sk_rcv - handle incoming message - * @skb: buffer containing arriving message - * Consumes buffer - * Returns 0 if success, or errno: -EHOSTUNREACH + * tipc_sk_rcv - handle a chain of incoming buffers + * @inputq: buffer list containing the buffers + * Consumes all buffers in list until inputq is empty + * Note: may be called in multiple threads referring to the same queue + * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH + * Only node local calls check the return value, sending single-buffer queues */ -int tipc_sk_rcv(struct sk_buff *skb) +int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + u32 dnode, dport = 0; + int err = -TIPC_ERR_NO_PORT; + struct sk_buff *skb; struct tipc_sock *tsk; + struct tipc_net *tn; struct sock *sk; - u32 dport = msg_destport(buf_msg(skb)); - int rc = TIPC_OK; - uint limit; - u32 dnode; - /* Validate destination and message */ - tsk = tipc_sk_get(dport); - if (unlikely(!tsk)) { - rc = tipc_msg_eval(skb, &dnode); - goto exit; + while (skb_queue_len(inputq)) { + skb = NULL; + dport = tipc_skb_peek_port(inputq, dport); + tsk = tipc_sk_lookup(net, dport); + if (likely(tsk)) { + sk = &tsk->sk; + if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { + err = tipc_sk_enqueue(inputq, sk, dport, &skb); + spin_unlock_bh(&sk->sk_lock.slock); + dport = 0; + } + sock_put(sk); + } else { + skb = tipc_skb_dequeue(inputq, dport); + } + if (likely(!skb)) + continue; + if (tipc_msg_lookup_dest(net, skb, &dnode, &err)) + goto xmit; + if (!err) { + dnode = msg_destnode(buf_msg(skb)); + goto xmit; + } + tn = net_generic(net, tipc_net_id); + if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err)) + continue; +xmit: + tipc_link_xmit_skb(net, skb, dnode, dport); } - sk = &tsk->sk; - - /* Queue message */ - spin_lock_bh(&sk->sk_lock.slock); - - if (!sock_owned_by_user(sk)) { - rc = filter_rcv(sk, skb); - } else { - if (sk->sk_backlog.len == 0) - atomic_set(&tsk->dupl_rcvcnt, 0); - limit = rcvbuf_limit(sk, skb) + atomic_read(&tsk->dupl_rcvcnt); - if (sk_add_backlog(sk, skb, limit)) - rc = -TIPC_ERR_OVERLOAD; - } - spin_unlock_bh(&sk->sk_lock.slock); - tipc_sk_put(tsk); - if (likely(!rc)) - return 0; -exit: - if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc)) - return -EHOSTUNREACH; - - tipc_link_xmit_skb(skb, dnode, 0); - return (rc < 0) ? -EHOSTUNREACH : 0; + return err ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -2027,6 +2127,7 @@ exit: static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct tipc_sock *tsk = tipc_sk(sk); struct sk_buff *skb; u32 dnode; @@ -2049,21 +2150,24 @@ restart: kfree_skb(skb); goto restart; } - if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit_skb(skb, dnode, tsk->ref); - tipc_node_remove_conn(dnode, tsk->ref); + if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, + TIPC_CONN_SHUTDOWN)) + tipc_link_xmit_skb(net, skb, dnode, + tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, - 0, dnode, tipc_own_addr, + 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), - tsk->ref, TIPC_CONN_SHUTDOWN); - tipc_link_xmit_skb(skb, dnode, tsk->ref); + tsk->portid, TIPC_CONN_SHUTDOWN); + tipc_link_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(dnode, tsk->ref); + tipc_node_remove_conn(net, dnode, tsk->portid); /* fall through */ case SS_DISCONNECTING: @@ -2084,18 +2188,14 @@ restart: return res; } -static void tipc_sk_timeout(unsigned long ref) +static void tipc_sk_timeout(unsigned long data) { - struct tipc_sock *tsk; - struct sock *sk; + struct tipc_sock *tsk = (struct tipc_sock *)data; + struct sock *sk = &tsk->sk; struct sk_buff *skb = NULL; u32 peer_port, peer_node; + u32 own_node = tsk_own_node(tsk); - tsk = tipc_sk_get(ref); - if (!tsk) - return; - - sk = &tsk->sk; bh_lock_sock(sk); if (!tsk->connected) { bh_unlock_sock(sk); @@ -2106,38 +2206,39 @@ static void tipc_sk_timeout(unsigned long ref) if (tsk->probing_state == TIPC_CONN_PROBING) { /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, - SHORT_H_SIZE, 0, tipc_own_addr, - peer_node, ref, peer_port, - TIPC_ERR_NO_PORT); + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, + own_node, peer_node, tsk->portid, + peer_port, TIPC_ERR_NO_PORT); } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, - 0, peer_node, tipc_own_addr, - peer_port, ref, TIPC_OK); + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, + peer_port, tsk->portid, TIPC_OK); tsk->probing_state = TIPC_CONN_PROBING; - k_start_timer(&tsk->timer, tsk->probing_interval); + sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); } bh_unlock_sock(sk); if (skb) - tipc_link_xmit_skb(skb, peer_node, ref); + tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); exit: - tipc_sk_put(tsk); + sock_put(sk); } static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; u32 key; if (tsk->connected) return -EINVAL; - key = tsk->ref + tsk->pub_count + 1; - if (key == tsk->ref) + key = tsk->portid + tsk->pub_count + 1; + if (key == tsk->portid) return -EADDRINUSE; - publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, tsk->ref, key); + publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, + scope, tsk->portid, key); if (unlikely(!publ)) return -EINVAL; @@ -2150,6 +2251,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq) { + struct net *net = sock_net(&tsk->sk); struct publication *publ; struct publication *safe; int rc = -EINVAL; @@ -2164,12 +2266,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, continue; if (publ->upper != seq->upper) break; - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; break; } - tipc_nametbl_withdraw(publ->type, publ->lower, + tipc_nametbl_withdraw(net, publ->type, publ->lower, publ->ref, publ->key); rc = 0; } @@ -2178,336 +2280,105 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, return rc; } -static int tipc_sk_show(struct tipc_sock *tsk, char *buf, - int len, int full_id) -{ - struct publication *publ; - int ret; - - if (full_id) - ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", - tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), tsk->ref); - else - ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref); - - if (tsk->connected) { - u32 dport = tsk_peer_port(tsk); - u32 destnode = tsk_peer_node(tsk); - - ret += tipc_snprintf(buf + ret, len - ret, - " connected to <%u.%u.%u:%u>", - tipc_zone(destnode), - tipc_cluster(destnode), - tipc_node(destnode), dport); - if (tsk->conn_type != 0) - ret += tipc_snprintf(buf + ret, len - ret, - " via {%u,%u}", tsk->conn_type, - tsk->conn_instance); - } else if (tsk->published) { - ret += tipc_snprintf(buf + ret, len - ret, " bound to"); - list_for_each_entry(publ, &tsk->publications, pport_list) { - if (publ->lower == publ->upper) - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u}", publ->type, - publ->lower); - else - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u,%u}", publ->type, - publ->lower, publ->upper); - } - } - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -struct sk_buff *tipc_sk_socks_show(void) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - struct tipc_sock *tsk; - int str_len = 0; - u32 ref = 0; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - str_len += tipc_sk_show(tsk, pb + str_len, - pb_len - str_len, 0); - release_sock(&tsk->sk); - tipc_sk_put(tsk); - } - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - /* tipc_sk_reinit: set non-zero address in all existing sockets * when we go from standalone to network mode. */ -void tipc_sk_reinit(void) +void tipc_sk_reinit(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + const struct bucket_table *tbl; + struct rhash_head *pos; + struct tipc_sock *tsk; struct tipc_msg *msg; - u32 ref = 0; - struct tipc_sock *tsk = tipc_sk_get_next(&ref); + int i; - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - msg = &tsk->phdr; - msg_set_prevnode(msg, tipc_own_addr); - msg_set_orignode(msg, tipc_own_addr); - release_sock(&tsk->sk); - tipc_sk_put(tsk); + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (i = 0; i < tbl->size; i++) { + rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + msg = &tsk->phdr; + msg_set_prevnode(msg, tn->own_addr); + msg_set_orignode(msg, tn->own_addr); + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } } + rcu_read_unlock(); } -/** - * struct reference - TIPC socket reference entry - * @tsk: pointer to socket associated with reference entry - * @ref: reference value for socket (combines instance & array index info) - */ -struct reference { - struct tipc_sock *tsk; - u32 ref; -}; - -/** - * struct tipc_ref_table - table of TIPC socket reference entries - * @entries: pointer to array of reference entries - * @capacity: array index of first unusable entry - * @init_point: array index of first uninitialized entry - * @first_free: array index of first unused socket reference entry - * @last_free: array index of last unused socket reference entry - * @index_mask: bitmask for array index portion of reference values - * @start_mask: initial value for instance value portion of reference values - */ -struct ref_table { - struct reference *entries; - u32 capacity; - u32 init_point; - u32 first_free; - u32 last_free; - u32 index_mask; - u32 start_mask; -}; - -/* Socket reference table consists of 2**N entries. - * - * State Socket ptr Reference - * ----- ---------- --------- - * In use non-NULL XXXX|own index - * (XXXX changes each time entry is acquired) - * Free NULL YYYY|next free index - * (YYYY is one more than last used XXXX) - * Uninitialized NULL 0 - * - * Entry 0 is not used; this allows index 0 to denote the end of the free list. - * - * Note that a reference value of 0 does not necessarily indicate that an - * entry is uninitialized, since the last entry in the free list could also - * have a reference value of 0 (although this is unlikely). - */ - -static struct ref_table tipc_ref_table; - -static DEFINE_RWLOCK(ref_table_lock); - -/** - * tipc_ref_table_init - create reference table for sockets - */ -int tipc_sk_ref_table_init(u32 req_sz, u32 start) +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) { - struct reference *table; - u32 actual_sz; - - /* account for unused entry, then round up size to a power of 2 */ - - req_sz++; - for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) { - /* do nothing */ - }; - - /* allocate table & mark all entries as uninitialized */ - table = vzalloc(actual_sz * sizeof(struct reference)); - if (table == NULL) - return -ENOMEM; - - tipc_ref_table.entries = table; - tipc_ref_table.capacity = req_sz; - tipc_ref_table.init_point = 1; - tipc_ref_table.first_free = 0; - tipc_ref_table.last_free = 0; - tipc_ref_table.index_mask = actual_sz - 1; - tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_sock *tsk; - return 0; -} + rcu_read_lock(); + tsk = rhashtable_lookup(&tn->sk_rht, &portid); + if (tsk) + sock_hold(&tsk->sk); + rcu_read_unlock(); -/** - * tipc_ref_table_stop - destroy reference table for sockets - */ -void tipc_sk_ref_table_stop(void) -{ - if (!tipc_ref_table.entries) - return; - vfree(tipc_ref_table.entries); - tipc_ref_table.entries = NULL; + return tsk; } -/* tipc_ref_acquire - create reference to a socket - * - * Register an socket pointer in the reference table. - * Returns a unique reference value that is used from then on to retrieve the - * socket pointer, or to determine if the socket has been deregistered. - */ -u32 tipc_sk_ref_acquire(struct tipc_sock *tsk) +static int tipc_sk_insert(struct tipc_sock *tsk) { - u32 index; - u32 index_mask; - u32 next_plus_upper; - u32 ref = 0; - struct reference *entry; - - if (unlikely(!tsk)) { - pr_err("Attempt to acquire ref. to non-existent obj\n"); - return 0; - } - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found in acquisition attempt\n"); - return 0; - } - - /* Take a free entry, if available; otherwise initialize a new one */ - write_lock_bh(&ref_table_lock); - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; - - if (likely(index)) { - index = tipc_ref_table.first_free; - entry = &tipc_ref_table.entries[index]; - index_mask = tipc_ref_table.index_mask; - next_plus_upper = entry->ref; - tipc_ref_table.first_free = next_plus_upper & index_mask; - ref = (next_plus_upper & ~index_mask) + index; - entry->tsk = tsk; - } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { - index = tipc_ref_table.init_point++; - entry = &tipc_ref_table.entries[index]; - ref = tipc_ref_table.start_mask + index; + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; + u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; + + while (remaining--) { + portid++; + if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) + portid = TIPC_MIN_PORT; + tsk->portid = portid; + sock_hold(&tsk->sk); + if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node)) + return 0; + sock_put(&tsk->sk); } - if (ref) { - entry->ref = ref; - entry->tsk = tsk; - } - write_unlock_bh(&ref_table_lock); - return ref; + return -1; } -/* tipc_sk_ref_discard - invalidate reference to an socket - * - * Disallow future references to an socket and free up the entry for re-use. - */ -void tipc_sk_ref_discard(u32 ref) +static void tipc_sk_remove(struct tipc_sock *tsk) { - struct reference *entry; - u32 index; - u32 index_mask; - - if (unlikely(!tipc_ref_table.entries)) { - pr_err("Ref. table not found during discard attempt\n"); - return; - } - - index_mask = tipc_ref_table.index_mask; - index = ref & index_mask; - entry = &tipc_ref_table.entries[index]; - - write_lock_bh(&ref_table_lock); + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (unlikely(!entry->tsk)) { - pr_err("Attempt to discard ref. to non-existent socket\n"); - goto exit; + if (rhashtable_remove(&tn->sk_rht, &tsk->node)) { + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); } - if (unlikely(entry->ref != ref)) { - pr_err("Attempt to discard non-existent reference\n"); - goto exit; - } - - /* Mark entry as unused; increment instance part of entry's - * reference to invalidate any subsequent references - */ - - entry->tsk = NULL; - entry->ref = (ref & ~index_mask) + (index_mask + 1); - - /* Append entry to free entry list */ - if (unlikely(tipc_ref_table.first_free == 0)) - tipc_ref_table.first_free = index; - else - tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; - tipc_ref_table.last_free = index; -exit: - write_unlock_bh(&ref_table_lock); } -/* tipc_sk_get - find referenced socket and return pointer to it - */ -struct tipc_sock *tipc_sk_get(u32 ref) +int tipc_sk_rht_init(struct net *net) { - struct reference *entry; - struct tipc_sock *tsk; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct rhashtable_params rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .hashfn = jhash, + .max_shift = 20, /* 1M */ + .min_shift = 8, /* 256 */ + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + }; - if (unlikely(!tipc_ref_table.entries)) - return NULL; - read_lock_bh(&ref_table_lock); - entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask]; - tsk = entry->tsk; - if (likely(tsk && (entry->ref == ref))) - sock_hold(&tsk->sk); - else - tsk = NULL; - read_unlock_bh(&ref_table_lock); - return tsk; + return rhashtable_init(&tn->sk_rht, &rht_params); } -/* tipc_sk_get_next - lock & return next socket after referenced one -*/ -struct tipc_sock *tipc_sk_get_next(u32 *ref) +void tipc_sk_rht_destroy(struct net *net) { - struct reference *entry; - struct tipc_sock *tsk = NULL; - uint index = *ref & tipc_ref_table.index_mask; + struct tipc_net *tn = net_generic(net, tipc_net_id); - read_lock_bh(&ref_table_lock); - while (++index < tipc_ref_table.capacity) { - entry = &tipc_ref_table.entries[index]; - if (!entry->tsk) - continue; - tsk = entry->tsk; - sock_hold(&tsk->sk); - *ref = entry->ref; - break; - } - read_unlock_bh(&ref_table_lock); - return tsk; -} + /* Wait for socket readers to complete */ + synchronize_net(); -static void tipc_sk_put(struct tipc_sock *tsk) -{ - sock_put(&tsk->sk); + rhashtable_destroy(&tn->sk_rht); } /** @@ -2639,8 +2510,9 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } -static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + struct sock *sk = sock->sk; struct tipc_sioc_ln_req lnr; void __user *argp = (void __user *)arg; @@ -2648,7 +2520,8 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer, + if (!tipc_node_get_linkname(sock_net(sk), + lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; @@ -2820,18 +2693,20 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, int err; void *hdr; struct nlattr *attrs; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); if (!hdr) goto msg_cancel; attrs = nla_nest_start(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->ref)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) goto attr_msg_cancel; - if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr)) + if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) goto attr_msg_cancel; if (tsk->connected) { @@ -2859,22 +2734,37 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; struct tipc_sock *tsk; - u32 prev_ref = cb->args[0]; - u32 ref = prev_ref; - - tsk = tipc_sk_get_next(&ref); - for (; tsk; tsk = tipc_sk_get_next(&ref)) { - lock_sock(&tsk->sk); - err = __tipc_nl_add_sk(skb, cb, tsk); - release_sock(&tsk->sk); - tipc_sk_put(tsk); - if (err) - break; + const struct bucket_table *tbl; + struct rhash_head *pos; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 tbl_id = cb->args[0]; + u32 prev_portid = cb->args[1]; - prev_ref = ref; - } + rcu_read_lock(); + tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); + for (; tbl_id < tbl->size; tbl_id++) { + rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) { + spin_lock_bh(&tsk->sk.sk_lock.slock); + if (prev_portid && prev_portid != tsk->portid) { + spin_unlock_bh(&tsk->sk.sk_lock.slock); + continue; + } - cb->args[0] = prev_ref; + err = __tipc_nl_add_sk(skb, cb, tsk); + if (err) { + prev_portid = tsk->portid; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + goto out; + } + prev_portid = 0; + spin_unlock_bh(&tsk->sk.sk_lock.slock); + } + } +out: + rcu_read_unlock(); + cb->args[0] = tbl_id; + cb->args[1] = prev_portid; return skb->len; } @@ -2888,7 +2778,7 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, struct nlattr *attrs; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); if (!hdr) goto msg_cancel; @@ -2962,12 +2852,13 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; - u32 tsk_ref = cb->args[0]; + u32 tsk_portid = cb->args[0]; u32 last_publ = cb->args[1]; u32 done = cb->args[2]; + struct net *net = sock_net(skb->sk); struct tipc_sock *tsk; - if (!tsk_ref) { + if (!tsk_portid) { struct nlattr **attrs; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; @@ -2984,13 +2875,13 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!sock[TIPC_NLA_SOCK_REF]) return -EINVAL; - tsk_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); } if (done) return 0; - tsk = tipc_sk_get(tsk_ref); + tsk = tipc_sk_lookup(net, tsk_portid); if (!tsk) return -EINVAL; @@ -2999,9 +2890,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!err) done = 1; release_sock(&tsk->sk); - tipc_sk_put(tsk); + sock_put(&tsk->sk); - cb->args[0] = tsk_ref; + cb->args[0] = tsk_portid; cb->args[1] = last_publ; cb->args[2] = done; diff --git a/net/tipc/socket.h b/net/tipc/socket.h index d34089387006..238f1b7bd9bd 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -1,6 +1,6 @@ /* net/tipc/socket.h: Include file for TIPC socket code * - * Copyright (c) 2014, Ericsson AB + * Copyright (c) 2014-2015, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,12 +42,18 @@ #define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) #define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) -int tipc_sk_rcv(struct sk_buff *buf); -struct sk_buff *tipc_sk_socks_show(void); -void tipc_sk_mcast_rcv(struct sk_buff *buf); -void tipc_sk_reinit(void); -int tipc_sk_ref_table_init(u32 requested_size, u32 start); -void tipc_sk_ref_table_stop(void); +int tipc_socket_init(void); +void tipc_socket_stop(void); +int tipc_sock_create_local(struct net *net, int type, struct socket **res); +void tipc_sock_release_local(struct socket *sock); +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags); +int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq); +void tipc_sk_reinit(struct net *net); +int tipc_sk_rht_init(struct net *net); +void tipc_sk_rht_destroy(struct net *net); int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0344206b984f..72c339e432aa 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -50,33 +50,6 @@ struct tipc_subscriber { struct list_head subscription_list; }; -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len); -static void *subscr_named_msg_event(int conid); -static void subscr_conn_shutdown_event(int conid, void *usr_data); - -static atomic_t subscription_count = ATOMIC_INIT(0); - -static struct sockaddr_tipc topsrv_addr __read_mostly = { - .family = AF_TIPC, - .addrtype = TIPC_ADDR_NAMESEQ, - .addr.nameseq.type = TIPC_TOP_SRV, - .addr.nameseq.lower = TIPC_TOP_SRV, - .addr.nameseq.upper = TIPC_TOP_SRV, - .scope = TIPC_NODE_SCOPE -}; - -static struct tipc_server topsrv __read_mostly = { - .saddr = &topsrv_addr, - .imp = TIPC_CRITICAL_IMPORTANCE, - .type = SOCK_SEQPACKET, - .max_rcvbuf_size = sizeof(struct tipc_subscr), - .name = "topology_server", - .tipc_conn_recvmsg = subscr_conn_msg_event, - .tipc_conn_new = subscr_named_msg_event, - .tipc_conn_shutdown = subscr_conn_shutdown_event, -}; - /** * htohl - convert value to endianness used by destination * @in: value to convert @@ -93,6 +66,7 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node) { + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; struct kvec msg_sect; @@ -103,8 +77,8 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base, - msg_sect.iov_len); + tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); } /** @@ -141,9 +115,11 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } -static void subscr_timeout(struct tipc_subscription *sub) +static void subscr_timeout(unsigned long data) { + struct tipc_subscription *sub = (struct tipc_subscription *)data; struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); /* The spin lock per subscriber is used to protect its members */ spin_lock_bh(&subscriber->lock); @@ -167,9 +143,8 @@ static void subscr_timeout(struct tipc_subscription *sub) TIPC_SUBSCR_TIMEOUT, 0, 0); /* Now destroy subscription */ - k_term_timer(&sub->timer); kfree(sub); - atomic_dec(&subscription_count); + atomic_dec(&tn->subscription_count); } /** @@ -179,10 +154,12 @@ static void subscr_timeout(struct tipc_subscription *sub) */ static void subscr_del(struct tipc_subscription *sub) { + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); kfree(sub); - atomic_dec(&subscription_count); + atomic_dec(&tn->subscription_count); } /** @@ -190,9 +167,12 @@ static void subscr_del(struct tipc_subscription *sub) * * Note: Must call it in process context since it might sleep. */ -static void subscr_terminate(struct tipc_subscriber *subscriber) +static void subscr_terminate(struct tipc_subscription *sub) { - tipc_conn_terminate(&topsrv, subscriber->conid); + struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + + tipc_conn_terminate(tn->topsrv, subscriber->conid); } static void subscr_release(struct tipc_subscriber *subscriber) @@ -207,8 +187,7 @@ static void subscr_release(struct tipc_subscriber *subscriber) subscription_list) { if (sub->timeout != TIPC_WAIT_FOREVER) { spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); + del_timer_sync(&sub->timer); spin_lock_bh(&subscriber->lock); } subscr_del(sub); @@ -250,8 +229,7 @@ static void subscr_cancel(struct tipc_subscr *s, if (sub->timeout != TIPC_WAIT_FOREVER) { sub->timeout = TIPC_WAIT_FOREVER; spin_unlock_bh(&subscriber->lock); - k_cancel_timer(&sub->timer); - k_term_timer(&sub->timer); + del_timer_sync(&sub->timer); spin_lock_bh(&subscriber->lock); } subscr_del(sub); @@ -262,9 +240,11 @@ static void subscr_cancel(struct tipc_subscr *s, * * Called with subscriber lock held. */ -static int subscr_subscribe(struct tipc_subscr *s, +static int subscr_subscribe(struct net *net, struct tipc_subscr *s, struct tipc_subscriber *subscriber, - struct tipc_subscription **sub_p) { + struct tipc_subscription **sub_p) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub; int swap; @@ -279,7 +259,7 @@ static int subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); return -EINVAL; @@ -293,10 +273,11 @@ static int subscr_subscribe(struct tipc_subscr *s, } /* Initialize subscription object */ + sub->net = net; sub->seq.type = htohl(s->seq.type, swap); sub->seq.lower = htohl(s->seq.lower, swap); sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = htohl(s->timeout, swap); + sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap)); sub->filter = htohl(s->filter, swap); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || @@ -309,11 +290,10 @@ static int subscr_subscribe(struct tipc_subscr *s, sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&subscription_count); + atomic_inc(&tn->subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { - k_init_timer(&sub->timer, - (Handler)subscr_timeout, (unsigned long)sub); - k_start_timer(&sub->timer, sub->timeout); + setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub); + mod_timer(&sub->timer, jiffies + sub->timeout); } *sub_p = sub; return 0; @@ -326,16 +306,18 @@ static void subscr_conn_shutdown_event(int conid, void *usr_data) } /* Handle one request to create a new subscription for the subscriber */ -static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, - void *usr_data, void *buf, size_t len) +static void subscr_conn_msg_event(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; spin_lock_bh(&subscriber->lock); - if (subscr_subscribe((struct tipc_subscr *)buf, subscriber, &sub) < 0) { + if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, + &sub) < 0) { spin_unlock_bh(&subscriber->lock); - subscr_terminate(subscriber); + subscr_terminate(sub); return; } if (sub) @@ -343,7 +325,6 @@ static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, spin_unlock_bh(&subscriber->lock); } - /* Handle one request to establish a new subscriber */ static void *subscr_named_msg_event(int conid) { @@ -362,12 +343,50 @@ static void *subscr_named_msg_event(int conid) return (void *)subscriber; } -int tipc_subscr_start(void) +int tipc_subscr_start(struct net *net) { - return tipc_server_start(&topsrv); + struct tipc_net *tn = net_generic(net, tipc_net_id); + const char name[] = "topology_server"; + struct tipc_server *topsrv; + struct sockaddr_tipc *saddr; + + saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC); + if (!saddr) + return -ENOMEM; + saddr->family = AF_TIPC; + saddr->addrtype = TIPC_ADDR_NAMESEQ; + saddr->addr.nameseq.type = TIPC_TOP_SRV; + saddr->addr.nameseq.lower = TIPC_TOP_SRV; + saddr->addr.nameseq.upper = TIPC_TOP_SRV; + saddr->scope = TIPC_NODE_SCOPE; + + topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC); + if (!topsrv) { + kfree(saddr); + return -ENOMEM; + } + topsrv->net = net; + topsrv->saddr = saddr; + topsrv->imp = TIPC_CRITICAL_IMPORTANCE; + topsrv->type = SOCK_SEQPACKET; + topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); + topsrv->tipc_conn_recvmsg = subscr_conn_msg_event; + topsrv->tipc_conn_new = subscr_named_msg_event; + topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event; + + strncpy(topsrv->name, name, strlen(name) + 1); + tn->topsrv = topsrv; + atomic_set(&tn->subscription_count, 0); + + return tipc_server_start(topsrv); } -void tipc_subscr_stop(void) +void tipc_subscr_stop(struct net *net) { - tipc_server_stop(&topsrv); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_server *topsrv = tn->topsrv; + + tipc_server_stop(topsrv); + kfree(topsrv->saddr); + kfree(topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 393e417bee3f..33488bd9fe3c 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -39,6 +39,9 @@ #include "server.h" +#define TIPC_MAX_SUBSCRIPTIONS 65535 +#define TIPC_MAX_PUBLICATIONS 65535 + struct tipc_subscription; struct tipc_subscriber; @@ -46,6 +49,7 @@ struct tipc_subscriber; * struct tipc_subscription - TIPC network topology subscription object * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription + * @net: point to network namespace * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription * @timer: timer governing subscription duration (optional) @@ -58,7 +62,8 @@ struct tipc_subscriber; struct tipc_subscription { struct tipc_subscriber *subscriber; struct tipc_name_seq seq; - u32 timeout; + struct net *net; + unsigned long timeout; u32 filter; struct timer_list timer; struct list_head nameseq_list; @@ -69,13 +74,10 @@ struct tipc_subscription { int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); - void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, u32 node, int must); - -int tipc_subscr_start(void); - -void tipc_subscr_stop(void); +int tipc_subscr_start(struct net *net); +void tipc_subscr_stop(struct net *net); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8e1b10274b02..526b6edab018 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1445,7 +1445,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); @@ -1456,14 +1455,12 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, unsigned int hash; struct sk_buff *skb; long timeo; - struct scm_cookie tmp_scm; + struct scm_cookie scm; int max_level; int data_len = 0; - if (NULL == siocb->scm) - siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm, false); + err = scm_send(sock, msg, &scm, false); if (err < 0) return err; @@ -1507,11 +1504,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - err = unix_scm_to_skb(siocb->scm, skb, true); + err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; max_level = err + 1; - unix_get_secdata(siocb->scm, skb); + unix_get_secdata(&scm, skb); skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1606,7 +1603,7 @@ restart: unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); - scm_destroy(siocb->scm); + scm_destroy(&scm); return len; out_unlock: @@ -1616,7 +1613,7 @@ out_free: out: if (other) sock_put(other); - scm_destroy(siocb->scm); + scm_destroy(&scm); return err; } @@ -1628,21 +1625,18 @@ out: static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct sock *other = NULL; int err, size; struct sk_buff *skb; int sent = 0; - struct scm_cookie tmp_scm; + struct scm_cookie scm; bool fds_sent = false; int max_level; int data_len; - if (NULL == siocb->scm) - siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm, false); + err = scm_send(sock, msg, &scm, false); if (err < 0) return err; @@ -1683,7 +1677,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out_err; /* Only send the fds in the first buffer */ - err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); + err = unix_scm_to_skb(&scm, skb, !fds_sent); if (err < 0) { kfree_skb(skb); goto out_err; @@ -1715,8 +1709,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, sent += size; } - scm_destroy(siocb->scm); - siocb->scm = NULL; + scm_destroy(&scm); return sent; @@ -1728,8 +1721,7 @@ pipe_err: send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: - scm_destroy(siocb->scm); - siocb->scm = NULL; + scm_destroy(&scm); return sent ? : err; } @@ -1778,8 +1770,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(iocb); - struct scm_cookie tmp_scm; + struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int noblock = flags & MSG_DONTWAIT; @@ -1831,16 +1822,14 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (sock_flag(sk, SOCK_RCVTSTAMP)) __sock_recv_timestamp(msg, sk, skb); - if (!siocb->scm) { - siocb->scm = &tmp_scm; - memset(&tmp_scm, 0, sizeof(tmp_scm)); - } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); - unix_set_secdata(siocb->scm, skb); + memset(&scm, 0, sizeof(scm)); + + scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + unix_set_secdata(&scm, skb); if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) - unix_detach_fds(siocb->scm, skb); + unix_detach_fds(&scm, skb); sk_peek_offset_bwd(sk, skb->len); } else { @@ -1860,11 +1849,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); + scm.fp = scm_fp_dup(UNIXCB(skb).fp); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out_free: skb_free_datagram(sk, skb); @@ -1915,8 +1904,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(iocb); - struct scm_cookie tmp_scm; + struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); @@ -1943,10 +1931,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, * while sleeps in memcpy_tomsg */ - if (!siocb->scm) { - siocb->scm = &tmp_scm; - memset(&tmp_scm, 0, sizeof(tmp_scm)); - } + memset(&scm, 0, sizeof(scm)); err = mutex_lock_interruptible(&u->readlock); if (unlikely(err)) { @@ -2012,13 +1997,13 @@ again: if (check_creds) { /* Never glue messages from different writers */ - if ((UNIXCB(skb).pid != siocb->scm->pid) || - !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || - !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) + if ((UNIXCB(skb).pid != scm.pid) || + !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || + !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } @@ -2045,7 +2030,7 @@ again: sk_peek_offset_bwd(sk, chunk); if (UNIXCB(skb).fp) - unix_detach_fds(siocb->scm, skb); + unix_detach_fds(&scm, skb); if (unix_skb_len(skb)) break; @@ -2053,13 +2038,13 @@ again: skb_unlink(skb, &sk->sk_receive_queue); consume_skb(skb); - if (siocb->scm->fp) + if (scm.fp) break; } else { /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); + scm.fp = scm_fp_dup(UNIXCB(skb).fp); sk_peek_offset_fwd(sk, chunk); @@ -2068,7 +2053,7 @@ again: } while (size); mutex_unlock(&u->readlock); - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out: return copied ? : err; } diff --git a/net/unix/diag.c b/net/unix/diag.c index 86fa0f3b2caf..ef542fbca9fe 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -155,7 +155,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 02d2e5229240..7f3255084a6c 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1850,8 +1850,7 @@ static ssize_t vmci_transport_stream_enqueue( struct msghdr *msg, size_t len) { - /* XXX: stripping const */ - return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0); + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); } static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 22ba971741e5..29c8675f9a11 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -175,7 +175,7 @@ config CFG80211_INTERNAL_REGDB Most distributions have a CRDA package. So if unsure, say N. config CFG80211_WEXT - bool + bool "cfg80211 wireless extensions compatibility" depends on CFG80211 select WEXT_CORE help diff --git a/net/wireless/core.c b/net/wireless/core.c index 53dda7728f86..3af0ecf1cc16 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -320,6 +320,20 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work) rtnl_unlock(); } +static void cfg80211_sched_scan_stop_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + sched_scan_stop_wk); + + rtnl_lock(); + + __cfg80211_stop_sched_scan(rdev, false); + + rtnl_unlock(); +} + /* exported functions */ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, @@ -406,6 +420,7 @@ use_default_name: INIT_LIST_HEAD(&rdev->destroy_list); spin_lock_init(&rdev->destroy_list_lock); INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); + INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -560,6 +575,14 @@ int wiphy_register(struct wiphy *wiphy) BIT(NL80211_IFTYPE_MONITOR))) wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF; + if (WARN_ON((wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) && + (wiphy->regulatory_flags & + (REGULATORY_CUSTOM_REG | + REGULATORY_STRICT_REG | + REGULATORY_COUNTRY_IE_FOLLOW_POWER | + REGULATORY_COUNTRY_IE_IGNORE)))) + return -EINVAL; + if (WARN_ON(wiphy->coalesce && (!wiphy->coalesce->n_rules || !wiphy->coalesce->n_patterns) && @@ -778,6 +801,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); flush_work(&rdev->destroy_work); + flush_work(&rdev->sched_scan_stop_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -858,6 +882,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; + struct cfg80211_sched_scan_request *sched_scan_req; ASSERT_RTNL(); ASSERT_WDEV_LOCK(wdev); @@ -868,7 +893,8 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev) + sched_scan_req = rtnl_dereference(rdev->sched_scan_req); + if (sched_scan_req && dev == sched_scan_req->dev) __cfg80211_stop_sched_scan(rdev, false); #ifdef CONFIG_CFG80211_WEXT @@ -943,6 +969,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; + struct cfg80211_sched_scan_request *sched_scan_req; if (!wdev) return NOTIFY_DONE; @@ -1007,8 +1034,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, ___cfg80211_scan_done(rdev, false); } - if (WARN_ON(rdev->sched_scan_req && - rdev->sched_scan_req->dev == wdev->netdev)) { + sched_scan_req = rtnl_dereference(rdev->sched_scan_req); + if (WARN_ON(sched_scan_req && + sched_scan_req->dev == wdev->netdev)) { __cfg80211_stop_sched_scan(rdev, false); } diff --git a/net/wireless/core.h b/net/wireless/core.h index faa5b1609aae..801cd49c5a0c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -36,6 +36,13 @@ struct cfg80211_registered_device { * the country on the country IE changed. */ char country_ie_alpha2[2]; + /* + * the driver requests the regulatory core to set this regulatory + * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED + * devices using the regulatory_set_wiphy_regd() API + */ + const struct ieee80211_regdomain *requested_regd; + /* If a Country IE has been received this tells us the environment * which its telling us its in. This defaults to ENVIRON_ANY */ enum environment_cap env; @@ -63,7 +70,7 @@ struct cfg80211_registered_device { u32 bss_generation; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; - struct cfg80211_sched_scan_request *sched_scan_req; + struct cfg80211_sched_scan_request __rcu *sched_scan_req; unsigned long suspend_at; struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; @@ -84,6 +91,8 @@ struct cfg80211_registered_device { struct list_head destroy_list; struct work_struct destroy_work; + struct work_struct sched_scan_stop_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ca4b5133123..d78fd8b54515 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -59,13 +59,13 @@ enum nl80211_multicast_groups { }; static const struct genl_multicast_group nl80211_mcgrps[] = { - [NL80211_MCGRP_CONFIG] = { .name = "config", }, - [NL80211_MCGRP_SCAN] = { .name = "scan", }, - [NL80211_MCGRP_REGULATORY] = { .name = "regulatory", }, - [NL80211_MCGRP_MLME] = { .name = "mlme", }, - [NL80211_MCGRP_VENDOR] = { .name = "vendor", }, + [NL80211_MCGRP_CONFIG] = { .name = NL80211_MULTICAST_GROUP_CONFIG }, + [NL80211_MCGRP_SCAN] = { .name = NL80211_MULTICAST_GROUP_SCAN }, + [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, + [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, + [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, #ifdef CONFIG_NL80211_TESTMODE - [NL80211_MCGRP_TESTMODE] = { .name = "testmode", } + [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif }; @@ -396,6 +396,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN }, + [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, + [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, + [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -1087,6 +1090,11 @@ static int nl80211_send_wowlan(struct sk_buff *msg, return -ENOBUFS; } + if ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_NET_DETECT) && + nla_put_u32(msg, NL80211_WOWLAN_TRIG_NET_DETECT, + rdev->wiphy.wowlan->max_nd_match_sets)) + return -ENOBUFS; + if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; @@ -1701,12 +1709,22 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.max_num_csa_counters)) goto nla_put_failure; + if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && + nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) + goto nla_put_failure; + + if (nla_put(msg, NL80211_ATTR_EXT_FEATURES, + sizeof(rdev->wiphy.ext_features), + rdev->wiphy.ext_features)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; } finish: - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -2389,7 +2407,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -2854,6 +2873,9 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->get_key) return -EOPNOTSUPP; + if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) + return -ENOENT; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -2873,10 +2895,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) goto nla_put_failure; - if (pairwise && mac_addr && - !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) - return -ENOENT; - err = rdev_get_key(rdev, dev, key_idx, pairwise, mac_addr, &cookie, get_key_callback); @@ -3047,7 +3065,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) wdev_lock(dev->ieee80211_ptr); err = nl80211_key_allowed(dev->ieee80211_ptr); - if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr && + if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; @@ -3563,6 +3581,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, struct nlattr *rate; u32 bitrate; u16 bitrate_compat; + enum nl80211_attrs rate_flg; rate = nla_nest_start(msg, attr); if (!rate) @@ -3579,12 +3598,36 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) return false; + switch (info->bw) { + case RATE_INFO_BW_5: + rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH; + break; + case RATE_INFO_BW_10: + rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH; + break; + default: + WARN_ON(1); + /* fall through */ + case RATE_INFO_BW_20: + rate_flg = 0; + break; + case RATE_INFO_BW_40: + rate_flg = NL80211_RATE_INFO_40_MHZ_WIDTH; + break; + case RATE_INFO_BW_80: + rate_flg = NL80211_RATE_INFO_80_MHZ_WIDTH; + break; + case RATE_INFO_BW_160: + rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH; + break; + } + + if (rate_flg && nla_put_flag(msg, rate_flg)) + return false; + if (info->flags & RATE_INFO_FLAGS_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) return false; - if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) - return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; @@ -3593,18 +3636,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, return false; if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss)) return false; - if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) - return false; - if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH)) - return false; - if (info->flags & RATE_INFO_FLAGS_80P80_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_80P80_MHZ_WIDTH)) - return false; - if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH && - nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH)) - return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; @@ -3640,8 +3671,8 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, return true; } -static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, - int flags, +static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, + u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo) @@ -3649,7 +3680,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, void *hdr; struct nlattr *sinfoattr, *bss_param; - hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -3661,115 +3692,77 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO); if (!sinfoattr) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_CONNECTED_TIME) && - nla_put_u32(msg, NL80211_STA_INFO_CONNECTED_TIME, - sinfo->connected_time)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_INACTIVE_TIME) && - nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME, - sinfo->inactive_time)) - goto nla_put_failure; - if ((sinfo->filled & (STATION_INFO_RX_BYTES | - STATION_INFO_RX_BYTES64)) && + +#define PUT_SINFO(attr, memb, type) do { \ + if (sinfo->filled & BIT(NL80211_STA_INFO_ ## attr) && \ + nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \ + sinfo->memb)) \ + goto nla_put_failure; \ + } while (0) + + PUT_SINFO(CONNECTED_TIME, connected_time, u32); + PUT_SINFO(INACTIVE_TIME, inactive_time, u32); + + if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) | + BIT(NL80211_STA_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, (u32)sinfo->rx_bytes)) goto nla_put_failure; - if ((sinfo->filled & (STATION_INFO_TX_BYTES | - STATION_INFO_TX_BYTES64)) && + + if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) | + BIT(NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, (u32)sinfo->tx_bytes)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_RX_BYTES64) && - nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64, - sinfo->rx_bytes)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_BYTES64) && - nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64, - sinfo->tx_bytes)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_LLID) && - nla_put_u16(msg, NL80211_STA_INFO_LLID, sinfo->llid)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_PLID) && - nla_put_u16(msg, NL80211_STA_INFO_PLID, sinfo->plid)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_PLINK_STATE) && - nla_put_u8(msg, NL80211_STA_INFO_PLINK_STATE, - sinfo->plink_state)) - goto nla_put_failure; + + PUT_SINFO(RX_BYTES64, rx_bytes, u64); + PUT_SINFO(TX_BYTES64, tx_bytes, u64); + PUT_SINFO(LLID, llid, u16); + PUT_SINFO(PLID, plid, u16); + PUT_SINFO(PLINK_STATE, plink_state, u8); + switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: - if ((sinfo->filled & STATION_INFO_SIGNAL) && - nla_put_u8(msg, NL80211_STA_INFO_SIGNAL, - sinfo->signal)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_SIGNAL_AVG) && - nla_put_u8(msg, NL80211_STA_INFO_SIGNAL_AVG, - sinfo->signal_avg)) - goto nla_put_failure; + PUT_SINFO(SIGNAL, signal, u8); + PUT_SINFO(SIGNAL_AVG, signal_avg, u8); break; default: break; } - if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) { + if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal, NL80211_STA_INFO_CHAIN_SIGNAL)) goto nla_put_failure; } - if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) { + if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal_avg, NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) goto nla_put_failure; } - if (sinfo->filled & STATION_INFO_TX_BITRATE) { + if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) goto nla_put_failure; } - if (sinfo->filled & STATION_INFO_RX_BITRATE) { + if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->rxrate, NL80211_STA_INFO_RX_BITRATE)) goto nla_put_failure; } - if ((sinfo->filled & STATION_INFO_RX_PACKETS) && - nla_put_u32(msg, NL80211_STA_INFO_RX_PACKETS, - sinfo->rx_packets)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_PACKETS) && - nla_put_u32(msg, NL80211_STA_INFO_TX_PACKETS, - sinfo->tx_packets)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_RETRIES) && - nla_put_u32(msg, NL80211_STA_INFO_TX_RETRIES, - sinfo->tx_retries)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_FAILED) && - nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED, - sinfo->tx_failed)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) && - nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT, - sinfo->expected_throughput)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) && - nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, - sinfo->beacon_loss_count)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_LOCAL_PM) && - nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, - sinfo->local_pm)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_PEER_PM) && - nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, - sinfo->peer_pm)) - goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_NONPEER_PM) && - nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, - sinfo->nonpeer_pm)) - goto nla_put_failure; - if (sinfo->filled & STATION_INFO_BSS_PARAM) { + + PUT_SINFO(RX_PACKETS, rx_packets, u32); + PUT_SINFO(TX_PACKETS, tx_packets, u32); + PUT_SINFO(TX_RETRIES, tx_retries, u32); + PUT_SINFO(TX_FAILED, tx_failed, u32); + PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32); + PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32); + PUT_SINFO(LOCAL_PM, local_pm, u32); + PUT_SINFO(PEER_PM, peer_pm, u32); + PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); + + if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) goto nla_put_failure; @@ -3788,23 +3781,68 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_nest_end(msg, bss_param); } - if ((sinfo->filled & STATION_INFO_STA_FLAGS) && + if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) && nla_put(msg, NL80211_STA_INFO_STA_FLAGS, sizeof(struct nl80211_sta_flag_update), &sinfo->sta_flags)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_T_OFFSET) && - nla_put_u64(msg, NL80211_STA_INFO_T_OFFSET, - sinfo->t_offset)) - goto nla_put_failure; + + PUT_SINFO(T_OFFSET, t_offset, u64); + PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64); + PUT_SINFO(BEACON_RX, rx_beacon, u64); + PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); + +#undef PUT_SINFO + + if (sinfo->filled & BIT(NL80211_STA_INFO_TID_STATS)) { + struct nlattr *tidsattr; + int tid; + + tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS); + if (!tidsattr) + goto nla_put_failure; + + for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) { + struct cfg80211_tid_stats *tidstats; + struct nlattr *tidattr; + + tidstats = &sinfo->pertid[tid]; + + if (!tidstats->filled) + continue; + + tidattr = nla_nest_start(msg, tid + 1); + if (!tidattr) + goto nla_put_failure; + +#define PUT_TIDVAL(attr, memb, type) do { \ + if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) && \ + nla_put_ ## type(msg, NL80211_TID_STATS_ ## attr, \ + tidstats->memb)) \ + goto nla_put_failure; \ + } while (0) + + PUT_TIDVAL(RX_MSDU, rx_msdu, u64); + PUT_TIDVAL(TX_MSDU, tx_msdu, u64); + PUT_TIDVAL(TX_MSDU_RETRIES, tx_msdu_retries, u64); + PUT_TIDVAL(TX_MSDU_FAILED, tx_msdu_failed, u64); + +#undef PUT_TIDVAL + nla_nest_end(msg, tidattr); + } + + nla_nest_end(msg, tidsattr); + } + nla_nest_end(msg, sinfoattr); - if ((sinfo->filled & STATION_INFO_ASSOC_REQ_IES) && + if (sinfo->assoc_req_ies_len && nla_put(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len, sinfo->assoc_req_ies)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -3844,7 +3882,7 @@ static int nl80211_dump_station(struct sk_buff *skb, if (err) goto out_err; - if (nl80211_send_station(skb, + if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev->netdev, mac_addr, @@ -3891,7 +3929,8 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0, + if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, + info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -4533,7 +4572,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, nla_nest_end(msg, pinfoattr); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -5327,42 +5367,20 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, return err; } -static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) +static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, + struct sk_buff *msg) { - const struct ieee80211_regdomain *regdom; - struct sk_buff *msg; - void *hdr = NULL; struct nlattr *nl_reg_rules; unsigned int i; - if (!cfg80211_regdomain) - return -EINVAL; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOBUFS; - - hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, - NL80211_CMD_GET_REG); - if (!hdr) - goto put_failure; - - if (reg_last_request_cell_base() && - nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, - NL80211_USER_REG_HINT_CELL_BASE)) - goto nla_put_failure; - - rcu_read_lock(); - regdom = rcu_dereference(cfg80211_regdomain); - if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) || (regdom->dfs_region && nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) - goto nla_put_failure_rcu; + goto nla_put_failure; nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) - goto nla_put_failure_rcu; + goto nla_put_failure; for (i = 0; i < regdom->n_reg_rules; i++) { struct nlattr *nl_reg_rule; @@ -5377,7 +5395,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nl_reg_rule = nla_nest_start(msg, i); if (!nl_reg_rule) - goto nla_put_failure_rcu; + goto nla_put_failure; max_bandwidth_khz = freq_range->max_bandwidth_khz; if (!max_bandwidth_khz) @@ -5398,13 +5416,74 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) power_rule->max_eirp) || nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME, reg_rule->dfs_cac_ms)) - goto nla_put_failure_rcu; + goto nla_put_failure; nla_nest_end(msg, nl_reg_rule); } - rcu_read_unlock(); nla_nest_end(msg, nl_reg_rules); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) +{ + const struct ieee80211_regdomain *regdom = NULL; + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy = NULL; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOBUFS; + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_GET_REG); + if (!hdr) + goto put_failure; + + if (info->attrs[NL80211_ATTR_WIPHY]) { + bool self_managed; + + rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); + if (IS_ERR(rdev)) { + nlmsg_free(msg); + return PTR_ERR(rdev); + } + + wiphy = &rdev->wiphy; + self_managed = wiphy->regulatory_flags & + REGULATORY_WIPHY_SELF_MANAGED; + regdom = get_wiphy_regdom(wiphy); + + /* a self-managed-reg device must have a private regdom */ + if (WARN_ON(!regdom && self_managed)) { + nlmsg_free(msg); + return -EINVAL; + } + + if (regdom && + nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) + goto nla_put_failure; + } + + if (!wiphy && reg_last_request_cell_base() && + nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, + NL80211_USER_REG_HINT_CELL_BASE)) + goto nla_put_failure; + + rcu_read_lock(); + + if (!regdom) + regdom = rcu_dereference(cfg80211_regdomain); + + if (nl80211_put_regdom(regdom, msg)) + goto nla_put_failure_rcu; + + rcu_read_unlock(); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); @@ -5418,6 +5497,84 @@ put_failure: return -EMSGSIZE; } +static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb, + u32 seq, int flags, struct wiphy *wiphy, + const struct ieee80211_regdomain *regdom) +{ + void *hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, + NL80211_CMD_GET_REG); + + if (!hdr) + return -1; + + genl_dump_check_consistent(cb, hdr, &nl80211_fam); + + if (nl80211_put_regdom(regdom, msg)) + goto nla_put_failure; + + if (!wiphy && reg_last_request_cell_base() && + nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, + NL80211_USER_REG_HINT_CELL_BASE)) + goto nla_put_failure; + + if (wiphy && + nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) + goto nla_put_failure; + + if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && + nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nl80211_get_reg_dump(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct ieee80211_regdomain *regdom = NULL; + struct cfg80211_registered_device *rdev; + int err, reg_idx, start = cb->args[2]; + + rtnl_lock(); + + if (cfg80211_regdomain && start == 0) { + err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, NULL, + rtnl_dereference(cfg80211_regdomain)); + if (err < 0) + goto out_err; + } + + /* the global regdom is idx 0 */ + reg_idx = 1; + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + regdom = get_wiphy_regdom(&rdev->wiphy); + if (!regdom) + continue; + + if (++reg_idx <= start) + continue; + + err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, &rdev->wiphy, regdom); + if (err < 0) { + reg_idx--; + break; + } + } + + cb->args[2] = reg_idx; + err = skb->len; +out_err: + rtnl_unlock(); + return err; +} + static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; @@ -5623,7 +5780,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = n_ssids; if (ie_len) { - if (request->ssids) + if (n_ssids) request->ie = (void *)(request->ssids + n_ssids); else request->ie = (void *)(request->channels + n_channels); @@ -5679,7 +5836,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->n_channels = i; i = 0; - if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { + if (n_ssids) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; @@ -5877,7 +6034,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = n_ssids; if (ie_len) { - if (request->ssids) + if (n_ssids) request->ie = (void *)(request->ssids + n_ssids); else request->ie = (void *)(request->channels + n_channels); @@ -5886,7 +6043,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (n_match_sets) { if (request->ie) request->match_sets = (void *)(request->ie + ie_len); - else if (request->ssids) + else if (n_ssids) request->match_sets = (void *)(request->ssids + n_ssids); else @@ -5945,7 +6102,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->n_channels = i; i = 0; - if (attrs[NL80211_ATTR_SCAN_SSIDS]) { + if (n_ssids) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { @@ -6053,6 +6210,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, } } + if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) + request->delay = + nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]); + request->interval = interval; request->scan_start = jiffies; @@ -6069,6 +6230,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_sched_scan_request *sched_scan_req; int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || @@ -6078,27 +6240,32 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (rdev->sched_scan_req) return -EINPROGRESS; - rdev->sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, - info->attrs); - err = PTR_ERR_OR_ZERO(rdev->sched_scan_req); + sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, + info->attrs); + + err = PTR_ERR_OR_ZERO(sched_scan_req); if (err) goto out_err; - err = rdev_sched_scan_start(rdev, dev, rdev->sched_scan_req); + err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) goto out_free; - rdev->sched_scan_req->dev = dev; - rdev->sched_scan_req->wiphy = &rdev->wiphy; + sched_scan_req->dev = dev; + sched_scan_req->wiphy = &rdev->wiphy; + + if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) + sched_scan_req->owner_nlportid = info->snd_portid; + + rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req); nl80211_send_sched_scan(rdev, dev, NL80211_CMD_START_SCHED_SCAN); return 0; out_free: - kfree(rdev->sched_scan_req); + kfree(sched_scan_req); out_err: - rdev->sched_scan_req = NULL; return err; } @@ -6433,7 +6600,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, nla_nest_end(msg, bss); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; fail_unlock_rcu: rcu_read_unlock(); @@ -6481,12 +6649,17 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) } static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, - int flags, struct net_device *dev, - struct survey_info *survey) + int flags, struct net_device *dev, + bool allow_radio_stats, + struct survey_info *survey) { void *hdr; struct nlattr *infoattr; + /* skip radio stats if userspace didn't request them */ + if (!survey->channel && !allow_radio_stats) + return 0; + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) @@ -6499,7 +6672,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, if (!infoattr) goto nla_put_failure; - if (nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY, + if (survey->channel && + nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY, survey->channel->center_freq)) goto nla_put_failure; @@ -6509,49 +6683,57 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, if ((survey->filled & SURVEY_INFO_IN_USE) && nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME, - survey->channel_time)) + if ((survey->filled & SURVEY_INFO_TIME) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME, + survey->time)) + goto nla_put_failure; + if ((survey->filled & SURVEY_INFO_TIME_BUSY) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_BUSY, + survey->time_busy)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY, - survey->channel_time_busy)) + if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, + survey->time_ext_busy)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY, - survey->channel_time_ext_busy)) + if ((survey->filled & SURVEY_INFO_TIME_RX) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_RX, + survey->time_rx)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_RX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX, - survey->channel_time_rx)) + if ((survey->filled & SURVEY_INFO_TIME_TX) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX, + survey->time_tx)) goto nla_put_failure; - if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_TX) && - nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX, - survey->channel_time_tx)) + if ((survey->filled & SURVEY_INFO_TIME_SCAN) && + nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_SCAN, + survey->time_scan)) goto nla_put_failure; nla_nest_end(msg, infoattr); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } -static int nl80211_dump_survey(struct sk_buff *skb, - struct netlink_callback *cb) +static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int survey_idx = cb->args[2]; int res; + bool radio_stats; res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) return res; + /* prepare_wdev_dump parsed the attributes */ + radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; + if (!wdev->netdev) { res = -EINVAL; goto out_err; @@ -6569,13 +6751,9 @@ static int nl80211_dump_survey(struct sk_buff *skb, if (res) goto out_err; - /* Survey without a channel doesn't make sense */ - if (!survey.channel) { - res = -EINVAL; - goto out; - } - - if (survey.channel->flags & IEEE80211_CHAN_DISABLED) { + /* don't send disabled channels, but do send non-channel data */ + if (survey.channel && + survey.channel->flags & IEEE80211_CHAN_DISABLED) { survey_idx++; continue; } @@ -6583,7 +6761,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, if (nl80211_send_survey(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - wdev->netdev, &survey) < 0) + wdev->netdev, radio_stats, &survey) < 0) goto out; survey_idx++; } @@ -7596,14 +7774,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net *net; int err; - u32 pid; - if (!info->attrs[NL80211_ATTR_PID]) - return -EINVAL; + if (info->attrs[NL80211_ATTR_PID]) { + u32 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); - pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); + net = get_net_ns_by_pid(pid); + } else if (info->attrs[NL80211_ATTR_NETNS_FD]) { + u32 fd = nla_get_u32(info->attrs[NL80211_ATTR_NETNS_FD]); + + net = get_net_ns_by_fd(fd); + } else { + return -EINVAL; + } - net = get_net_ns_by_pid(pid); if (IS_ERR(net)) return PTR_ERR(net); @@ -8599,6 +8782,48 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, return 0; } +static int nl80211_send_wowlan_nd(struct sk_buff *msg, + struct cfg80211_sched_scan_request *req) +{ + struct nlattr *nd, *freqs, *matches, *match; + int i; + + if (!req) + return 0; + + nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT); + if (!nd) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval)) + return -ENOBUFS; + + freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); + if (!freqs) + return -ENOBUFS; + + for (i = 0; i < req->n_channels; i++) + nla_put_u32(msg, i, req->channels[i]->center_freq); + + nla_nest_end(msg, freqs); + + if (req->n_match_sets) { + matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH); + for (i = 0; i < req->n_match_sets; i++) { + match = nla_nest_start(msg, i); + nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + req->match_sets[i].ssid.ssid_len, + req->match_sets[i].ssid.ssid); + nla_nest_end(msg, match); + } + nla_nest_end(msg, matches); + } + + nla_nest_end(msg, nd); + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -8656,6 +8881,11 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; + if (nl80211_send_wowlan_nd( + msg, + rdev->wiphy.wowlan_config->nd_config)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } @@ -10225,7 +10455,8 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_REG, - .doit = nl80211_get_reg, + .doit = nl80211_get_reg_do, + .dumpit = nl80211_get_reg_dump, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ @@ -10824,7 +11055,8 @@ static int nl80211_send_scan_msg(struct sk_buff *msg, /* ignore errors and send incomplete event anyway */ nl80211_add_scan_req(msg, rdev); - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -10847,7 +11079,8 @@ nl80211_send_sched_scan_msg(struct sk_buff *msg, nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; - return genlmsg_end(msg, hdr); + genlmsg_end(msg, hdr); + return 0; nla_put_failure: genlmsg_cancel(msg, hdr); @@ -10939,25 +11172,9 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, NL80211_MCGRP_SCAN, GFP_KERNEL); } -/* - * This can happen on global regulatory changes or device specific settings - * based on custom world regulatory domains. - */ -void nl80211_send_reg_change_event(struct regulatory_request *request) +static bool nl80211_reg_change_event_fill(struct sk_buff *msg, + struct regulatory_request *request) { - struct sk_buff *msg; - void *hdr; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return; - - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE); - if (!hdr) { - nlmsg_free(msg); - return; - } - /* Userspace can always count this one always being set */ if (nla_put_u8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator)) goto nla_put_failure; @@ -10983,8 +11200,46 @@ void nl80211_send_reg_change_event(struct regulatory_request *request) goto nla_put_failure; } - if (request->wiphy_idx != WIPHY_IDX_INVALID && - nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) + if (request->wiphy_idx != WIPHY_IDX_INVALID) { + struct wiphy *wiphy = wiphy_idx_to_wiphy(request->wiphy_idx); + + if (wiphy && + nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) + goto nla_put_failure; + + if (wiphy && + wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && + nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) + goto nla_put_failure; + } + + return true; + +nla_put_failure: + return false; +} + +/* + * This can happen on global regulatory changes or device specific settings + * based on custom regulatory domains. + */ +void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, + struct regulatory_request *request) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nl80211_reg_change_event_fill(msg, request) == false) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -11523,7 +11778,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, if (!msg) return; - if (nl80211_send_station(msg, 0, 0, 0, + if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0, rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; @@ -11534,12 +11789,16 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_new_sta); -void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) +void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; - void *hdr; + struct station_info empty_sinfo = {}; + + if (!sinfo) + sinfo = &empty_sinfo; trace_cfg80211_del_sta(dev, mac_addr); @@ -11547,27 +11806,16 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_STATION); - if (!hdr) { + if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0, + rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; } - if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) - goto nla_put_failure; - - genlmsg_end(msg, hdr); - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); - return; - - nla_put_failure: - genlmsg_cancel(msg, hdr); - nlmsg_free(msg); } -EXPORT_SYMBOL(cfg80211_del_sta); +EXPORT_SYMBOL(cfg80211_del_sta_sinfo); void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, enum nl80211_connect_failed_reason reason, @@ -12471,6 +12719,13 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { bool schedule_destroy_work = false; + bool schedule_scan_stop = false; + struct cfg80211_sched_scan_request *sched_scan_req = + rcu_dereference(rdev->sched_scan_req); + + if (sched_scan_req && notify->portid && + sched_scan_req->owner_nlportid == notify->portid) + schedule_scan_stop = true; list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); @@ -12501,6 +12756,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb, spin_unlock(&rdev->destroy_list_lock); schedule_work(&rdev->destroy_work); } + } else if (schedule_scan_stop) { + sched_scan_req->owner_nlportid = 0; + + if (rdev->ops->sched_scan_stop && + rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + schedule_work(&rdev->sched_scan_stop_wk); } } diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 7ad70d6f0cc6..84d4edf1d545 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -17,7 +17,21 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, struct net_device *netdev, u32 cmd); void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev, struct net_device *netdev); -void nl80211_send_reg_change_event(struct regulatory_request *request); +void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, + struct regulatory_request *request); + +static inline void +nl80211_send_reg_change_event(struct regulatory_request *request) +{ + nl80211_common_reg_change_event(NL80211_CMD_REG_CHANGE, request); +} + +static inline void +nl80211_send_wiphy_reg_change_event(struct regulatory_request *request) +{ + nl80211_common_reg_change_event(NL80211_CMD_WIPHY_REG_CHANGE, request); +} + void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7b8309840d4e..b586d0dcb09e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -109,7 +109,7 @@ static struct regulatory_request core_request_world = { * protected by RTNL (and can be accessed with RCU protection) */ static struct regulatory_request __rcu *last_request = - (void __rcu *)&core_request_world; + (void __force __rcu *)&core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -142,7 +142,7 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void) return rtnl_dereference(cfg80211_regdomain); } -static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) +const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { return rtnl_dereference(wiphy->regd); } @@ -1307,6 +1307,9 @@ static bool ignore_reg_update(struct wiphy *wiphy, { struct regulatory_request *lr = get_last_request(); + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + return true; + if (!lr) { REG_DBG_PRINT("Ignoring regulatory request set by %s " "since last_request is not set\n", @@ -1530,45 +1533,40 @@ static void reg_call_notifier(struct wiphy *wiphy, static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) { - struct ieee80211_channel *ch; struct cfg80211_chan_def chandef; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - bool ret = true; + enum nl80211_iftype iftype; wdev_lock(wdev); + iftype = wdev->iftype; + /* make sure the interface is active */ if (!wdev->netdev || !netif_running(wdev->netdev)) - goto out; + goto wdev_inactive_unlock; - switch (wdev->iftype) { + switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (!wdev->beacon_interval) - goto out; - - ret = cfg80211_reg_can_beacon(wiphy, - &wdev->chandef, wdev->iftype); + goto wdev_inactive_unlock; + chandef = wdev->chandef; break; case NL80211_IFTYPE_ADHOC: if (!wdev->ssid_len) - goto out; - - ret = cfg80211_reg_can_beacon(wiphy, - &wdev->chandef, wdev->iftype); + goto wdev_inactive_unlock; + chandef = wdev->chandef; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (!wdev->current_bss || !wdev->current_bss->pub.channel) - goto out; + goto wdev_inactive_unlock; - ch = wdev->current_bss->pub.channel; - if (rdev->ops->get_channel && - !rdev_get_channel(rdev, wdev, &chandef)) - ret = cfg80211_chandef_usable(wiphy, &chandef, - IEEE80211_CHAN_DISABLED); - else - ret = !(ch->flags & IEEE80211_CHAN_DISABLED); + if (!rdev->ops->get_channel || + rdev_get_channel(rdev, wdev, &chandef)) + cfg80211_chandef_create(&chandef, + wdev->current_bss->pub.channel, + NL80211_CHAN_NO_HT); break; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -1581,9 +1579,26 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) break; } -out: wdev_unlock(wdev); - return ret; + + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_ADHOC: + return cfg80211_reg_can_beacon(wiphy, &chandef, iftype); + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + return cfg80211_chandef_usable(wiphy, &chandef, + IEEE80211_CHAN_DISABLED); + default: + break; + } + + return true; + +wdev_inactive_unlock: + wdev_unlock(wdev); + return true; } static void reg_leave_invalid_chans(struct wiphy *wiphy) @@ -1683,8 +1698,12 @@ static void handle_channel_custom(struct wiphy *wiphy, if (IS_ERR(reg_rule)) { REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); - chan->orig_flags |= IEEE80211_CHAN_DISABLED; - chan->flags = chan->orig_flags; + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { + chan->flags |= IEEE80211_CHAN_DISABLED; + } else { + chan->orig_flags |= IEEE80211_CHAN_DISABLED; + chan->flags = chan->orig_flags; + } return; } @@ -1709,7 +1728,13 @@ static void handle_channel_custom(struct wiphy *wiphy, chan->dfs_state = NL80211_DFS_USABLE; chan->beacon_found = false; - chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags; + + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + chan->flags = chan->orig_flags | bw_flags | + map_regdom_flags(reg_rule->flags); + else + chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags; + chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain); chan->max_reg_power = chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); @@ -2095,6 +2120,26 @@ out_free: reg_free_request(reg_request); } +static bool reg_only_self_managed_wiphys(void) +{ + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy; + bool self_managed_found = false; + + ASSERT_RTNL(); + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + wiphy = &rdev->wiphy; + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + self_managed_found = true; + else + return false; + } + + /* make sure at least one self-managed wiphy exists */ + return self_managed_found; +} + /* * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* * Regulatory hints come on a first come first serve basis and we @@ -2126,6 +2171,11 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); + if (reg_only_self_managed_wiphys()) { + reg_free_request(reg_request); + return; + } + reg_process_hint(reg_request); } @@ -2153,11 +2203,52 @@ static void reg_process_pending_beacon_hints(void) spin_unlock_bh(®_pending_beacons_lock); } +static void reg_process_self_managed_hints(void) +{ + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy; + const struct ieee80211_regdomain *tmp; + const struct ieee80211_regdomain *regd; + enum ieee80211_band band; + struct regulatory_request request = {}; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + wiphy = &rdev->wiphy; + + spin_lock(®_requests_lock); + regd = rdev->requested_regd; + rdev->requested_regd = NULL; + spin_unlock(®_requests_lock); + + if (regd == NULL) + continue; + + tmp = get_wiphy_regdom(wiphy); + rcu_assign_pointer(wiphy->regd, regd); + rcu_free_regdom(tmp); + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + handle_band_custom(wiphy, wiphy->bands[band], regd); + + reg_process_ht_flags(wiphy); + + request.wiphy_idx = get_wiphy_idx(wiphy); + request.alpha2[0] = regd->alpha2[0]; + request.alpha2[1] = regd->alpha2[1]; + request.initiator = NL80211_REGDOM_SET_BY_DRIVER; + + nl80211_send_wiphy_reg_change_event(&request); + } + + reg_check_channels(); +} + static void reg_todo(struct work_struct *work) { rtnl_lock(); reg_process_pending_hints(); reg_process_pending_beacon_hints(); + reg_process_self_managed_hints(); rtnl_unlock(); } @@ -2438,6 +2529,8 @@ static void restore_regulatory_settings(bool reset_user) world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + continue; if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG) restore_custom_reg_settings(&rdev->wiphy); } @@ -2841,10 +2934,79 @@ int set_regdom(const struct ieee80211_regdomain *rd) return 0; } +static int __regulatory_set_wiphy_regd(struct wiphy *wiphy, + struct ieee80211_regdomain *rd) +{ + const struct ieee80211_regdomain *regd; + const struct ieee80211_regdomain *prev_regd; + struct cfg80211_registered_device *rdev; + + if (WARN_ON(!wiphy || !rd)) + return -EINVAL; + + if (WARN(!(wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED), + "wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n")) + return -EPERM; + + if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) { + print_regdomain_info(rd); + return -EINVAL; + } + + regd = reg_copy_regd(rd); + if (IS_ERR(regd)) + return PTR_ERR(regd); + + rdev = wiphy_to_rdev(wiphy); + + spin_lock(®_requests_lock); + prev_regd = rdev->requested_regd; + rdev->requested_regd = regd; + spin_unlock(®_requests_lock); + + kfree(prev_regd); + return 0; +} + +int regulatory_set_wiphy_regd(struct wiphy *wiphy, + struct ieee80211_regdomain *rd) +{ + int ret = __regulatory_set_wiphy_regd(wiphy, rd); + + if (ret) + return ret; + + schedule_work(®_work); + return 0; +} +EXPORT_SYMBOL(regulatory_set_wiphy_regd); + +int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy, + struct ieee80211_regdomain *rd) +{ + int ret; + + ASSERT_RTNL(); + + ret = __regulatory_set_wiphy_regd(wiphy, rd); + if (ret) + return ret; + + /* process the request immediately */ + reg_process_self_managed_hints(); + return 0; +} +EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync_rtnl); + void wiphy_regulatory_register(struct wiphy *wiphy) { struct regulatory_request *lr; + /* self-managed devices ignore external hints */ + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) + wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS | + REGULATORY_COUNTRY_IE_IGNORE; + if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 5e48031ccb9a..4b45d6e61d24 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -38,6 +38,7 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, const struct ieee80211_reg_rule *rule); bool reg_last_request_cell_base(void); +const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy); /** * regulatory_hint_found_beacon - hints a beacon was found on a channel diff --git a/net/wireless/scan.c b/net/wireless/scan.c index bda39f149810..c705c3e2b751 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -257,7 +257,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) rtnl_lock(); - request = rdev->sched_scan_req; + request = rtnl_dereference(rdev->sched_scan_req); /* we don't have sched_scan_req anymore if the scan is stopping */ if (request) { @@ -279,7 +279,8 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) { trace_cfg80211_sched_scan_results(wiphy); /* ignore if we're not scanning */ - if (wiphy_to_rdev(wiphy)->sched_scan_req) + + if (rcu_access_pointer(wiphy_to_rdev(wiphy)->sched_scan_req)) queue_work(cfg80211_wq, &wiphy_to_rdev(wiphy)->sched_scan_results_wk); } @@ -308,6 +309,7 @@ EXPORT_SYMBOL(cfg80211_sched_scan_stopped); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, bool driver_initiated) { + struct cfg80211_sched_scan_request *sched_scan_req; struct net_device *dev; ASSERT_RTNL(); @@ -315,7 +317,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, if (!rdev->sched_scan_req) return -ENOENT; - dev = rdev->sched_scan_req->dev; + sched_scan_req = rtnl_dereference(rdev->sched_scan_req); + dev = sched_scan_req->dev; if (!driver_initiated) { int err = rdev_sched_scan_stop(rdev, dev); @@ -325,8 +328,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED); - kfree(rdev->sched_scan_req); - rdev->sched_scan_req = NULL; + RCU_INIT_POINTER(rdev->sched_scan_req, NULL); + kfree_rcu(sched_scan_req, rcu_head); return 0; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ad38910f7036..b17b3692f8c2 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1604,11 +1604,12 @@ TRACE_EVENT(rdev_return_int_survey_info, WIPHY_ENTRY CHAN_ENTRY __field(int, ret) - __field(u64, channel_time) - __field(u64, channel_time_busy) - __field(u64, channel_time_ext_busy) - __field(u64, channel_time_rx) - __field(u64, channel_time_tx) + __field(u64, time) + __field(u64, time_busy) + __field(u64, time_ext_busy) + __field(u64, time_rx) + __field(u64, time_tx) + __field(u64, time_scan) __field(u32, filled) __field(s8, noise) ), @@ -1616,22 +1617,24 @@ TRACE_EVENT(rdev_return_int_survey_info, WIPHY_ASSIGN; CHAN_ASSIGN(info->channel); __entry->ret = ret; - __entry->channel_time = info->channel_time; - __entry->channel_time_busy = info->channel_time_busy; - __entry->channel_time_ext_busy = info->channel_time_ext_busy; - __entry->channel_time_rx = info->channel_time_rx; - __entry->channel_time_tx = info->channel_time_tx; + __entry->time = info->time; + __entry->time_busy = info->time_busy; + __entry->time_ext_busy = info->time_ext_busy; + __entry->time_rx = info->time_rx; + __entry->time_tx = info->time_tx; + __entry->time_scan = info->time_scan; __entry->filled = info->filled; __entry->noise = info->noise; ), TP_printk(WIPHY_PR_FMT ", returned: %d, " CHAN_PR_FMT ", channel time: %llu, channel time busy: %llu, " "channel time extension busy: %llu, channel time rx: %llu, " - "channel time tx: %llu, filled: %u, noise: %d", + "channel time tx: %llu, scan time: %llu, filled: %u, noise: %d", WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG, - __entry->channel_time, __entry->channel_time_busy, - __entry->channel_time_ext_busy, __entry->channel_time_rx, - __entry->channel_time_tx, __entry->filled, __entry->noise) + __entry->time, __entry->time_busy, + __entry->time_ext_busy, __entry->time_rx, + __entry->time_tx, __entry->time_scan, + __entry->filled, __entry->noise) ); TRACE_EVENT(rdev_tdls_oper, diff --git a/net/wireless/util.c b/net/wireless/util.c index d0ac795445b7..6903dbdcb8c1 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -227,18 +227,32 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, if (pairwise && !mac_addr) return -EINVAL; - /* - * Disallow pairwise keys with non-zero index unless it's WEP - * or a vendor specific cipher (because current deployments use - * pairwise WEP keys with non-zero indices and for vendor specific - * ciphers this should be validated in the driver or hardware level - * - but 802.11i clearly specifies to use zero) - */ - if (pairwise && key_idx && - ((params->cipher == WLAN_CIPHER_SUITE_TKIP) || - (params->cipher == WLAN_CIPHER_SUITE_CCMP) || - (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC))) - return -EINVAL; + switch (params->cipher) { + case WLAN_CIPHER_SUITE_TKIP: + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* Disallow pairwise keys with non-zero index unless it's WEP + * or a vendor specific cipher (because current deployments use + * pairwise WEP keys with non-zero indices and for vendor + * specific ciphers this should be validated in the driver or + * hardware level - but 802.11i clearly specifies to use zero) + */ + if (pairwise && key_idx) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + /* Disallow BIP (group-only) cipher as pairwise cipher */ + if (pairwise) + return -EINVAL; + break; + default: + break; + } switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: @@ -253,6 +267,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, if (params->key_len != WLAN_KEY_LEN_CCMP) return -EINVAL; break; + case WLAN_CIPHER_SUITE_CCMP_256: + if (params->key_len != WLAN_KEY_LEN_CCMP_256) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_GCMP: + if (params->key_len != WLAN_KEY_LEN_GCMP) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_GCMP_256: + if (params->key_len != WLAN_KEY_LEN_GCMP_256) + return -EINVAL; + break; case WLAN_CIPHER_SUITE_WEP104: if (params->key_len != WLAN_KEY_LEN_WEP104) return -EINVAL; @@ -261,6 +287,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, if (params->key_len != WLAN_KEY_LEN_AES_CMAC) return -EINVAL; break; + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + if (params->key_len != WLAN_KEY_LEN_BIP_CMAC_256) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_128) + return -EINVAL; + break; + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_256) + return -EINVAL; + break; default: /* * We don't know anything about this algorithm, @@ -280,7 +318,13 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, return -EINVAL; case WLAN_CIPHER_SUITE_TKIP: case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: if (params->seq_len != 6) return -EINVAL; break; @@ -308,6 +352,12 @@ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc) goto out; } + if (ieee80211_is_mgmt(fc)) { + if (ieee80211_has_order(fc)) + hdrlen += IEEE80211_HT_CTL_LEN; + goto out; + } + if (ieee80211_is_ctl(fc)) { /* * ACK and CTS are 10 bytes, all others 16. To see how @@ -708,8 +758,8 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, if (skb->priority >= 256 && skb->priority <= 263) return skb->priority - 256; - if (vlan_tx_tag_present(skb)) { - vlan_priority = (vlan_tx_tag_get(skb) & VLAN_PRIO_MASK) + if (skb_vlan_tag_present(skb)) { + vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; if (vlan_priority > 0) return vlan_priority; @@ -1073,10 +1123,24 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) if (WARN_ON_ONCE(rate->mcs > 9)) return 0; - idx = rate->flags & (RATE_INFO_FLAGS_160_MHZ_WIDTH | - RATE_INFO_FLAGS_80P80_MHZ_WIDTH) ? 3 : - rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 : - rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0; + switch (rate->bw) { + case RATE_INFO_BW_160: + idx = 3; + break; + case RATE_INFO_BW_80: + idx = 2; + break; + case RATE_INFO_BW_40: + idx = 1; + break; + case RATE_INFO_BW_5: + case RATE_INFO_BW_10: + default: + WARN_ON(1); + /* fall through */ + case RATE_INFO_BW_20: + idx = 0; + } bitrate = base[idx][rate->mcs]; bitrate *= rate->nss; @@ -1107,8 +1171,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate) modulation = rate->mcs & 7; streams = (rate->mcs >> 3) + 1; - bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ? - 13500000 : 6500000; + bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000; if (modulation < 4) bitrate *= (modulation + 1); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 0f47948c572f..5b24d39d7903 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1300,7 +1300,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; - if (!(sinfo.filled & STATION_INFO_TX_BITRATE)) + if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))) return -EOPNOTSUPP; rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); @@ -1340,7 +1340,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: - if (sinfo.filled & STATION_INFO_SIGNAL) { + if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) { int sig = sinfo.signal; wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; @@ -1354,7 +1354,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) break; } case CFG80211_SIGNAL_TYPE_UNSPEC: - if (sinfo.filled & STATION_INFO_SIGNAL) { + if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) { wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; wstats.qual.updated |= IW_QUAL_QUAL_UPDATED; wstats.qual.level = sinfo.signal; @@ -1367,9 +1367,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) } wstats.qual.updated |= IW_QUAL_NOISE_INVALID; - if (sinfo.filled & STATION_INFO_RX_DROP_MISC) + if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC)) wstats.discard.misc = sinfo.rx_dropped_misc; - if (sinfo.filled & STATION_INFO_TX_FAILED) + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED)) wstats.discard.retries = sinfo.tx_failed; return &wstats; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index debe733386f8..12e82a5e4ad5 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -561,11 +561,6 @@ static struct xfrm_algo_desc calg_list[] = { }, }; -static inline int aead_entries(void) -{ - return ARRAY_SIZE(aead_list); -} - static inline int aalg_entries(void) { return ARRAY_SIZE(aalg_list); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8128594ab379..7de2ed9ec46d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1019,7 +1019,8 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1121,7 +1122,8 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1842,7 +1844,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) goto out_cancel; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_cancel: nlmsg_cancel(skb, nlh); @@ -2282,7 +2285,8 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, goto out_cancel; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_cancel: nlmsg_cancel(skb, nlh); @@ -2490,7 +2494,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) return err; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2712,7 +2717,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, return err; } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, @@ -2827,7 +2833,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, } upe->hard = !!hard; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2986,7 +2993,8 @@ static int build_report(struct sk_buff *skb, u8 proto, return err; } } - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_report(struct net *net, u8 proto, @@ -3031,7 +3039,8 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, um->old_sport = x->encap->encap_sport; um->reqid = x->props.reqid; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; } static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, |