summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c126
-rw-r--r--net/8021q/vlan.h43
-rw-r--r--net/8021q/vlan_core.c270
-rw-r--r--net/8021q/vlan_dev.c140
-rw-r--r--net/8021q/vlan_gvrp.c4
-rw-r--r--net/8021q/vlan_netlink.c10
-rw-r--r--net/8021q/vlanproc.c42
-rw-r--r--net/Kconfig14
-rw-r--r--net/Makefile1
-rw-r--r--net/atm/atm_misc.c2
-rw-r--r--net/atm/br2684.c38
-rw-r--r--net/atm/clip.c159
-rw-r--r--net/atm/common.c34
-rw-r--r--net/atm/common.h1
-rw-r--r--net/atm/pppoatm.c4
-rw-r--r--net/ax25/af_ax25.c26
-rw-r--r--net/batman-adv/bat_sysfs.c4
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/gateway_client.c153
-rw-r--r--net/batman-adv/gateway_client.h5
-rw-r--r--net/batman-adv/gateway_common.c4
-rw-r--r--net/batman-adv/hash.c4
-rw-r--r--net/batman-adv/hash.h13
-rw-r--r--net/batman-adv/icmp_socket.c14
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/originator.c13
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/routing.c21
-rw-r--r--net/batman-adv/soft-interface.c45
-rw-r--r--net/batman-adv/translation-table.c456
-rw-r--r--net/batman-adv/types.h14
-rw-r--r--net/batman-adv/vis.c23
-rw-r--r--net/bluetooth/bnep/core.c4
-rw-r--r--net/bluetooth/hci_event.c2
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/bridge/br_device.c14
-rw-r--r--net/bridge/br_fdb.c102
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c5
-rw-r--r--net/bridge/br_multicast.c51
-rw-r--r--net/bridge/br_netfilter.c12
-rw-r--r--net/bridge/br_netlink.c6
-rw-r--r--net/bridge/br_private.h8
-rw-r--r--net/bridge/br_stp.c29
-rw-r--r--net/bridge/netfilter/ebt_ip6.c3
-rw-r--r--net/bridge/netfilter/ebt_log.c5
-rw-r--r--net/caif/Kconfig11
-rw-r--r--net/caif/Makefile1
-rw-r--r--net/caif/caif_dev.c273
-rw-r--r--net/caif/caif_usb.c208
-rw-r--r--net/caif/cfcnfg.c47
-rw-r--r--net/caif/cffrml.c11
-rw-r--r--net/caif/cfpkt_skbuff.c15
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/caif/cfserl.c3
-rw-r--r--net/ceph/crush/mapper.c35
-rw-r--r--net/ceph/osd_client.c2
-rw-r--r--net/core/Makefile6
-rw-r--r--net/core/dev.c316
-rw-r--r--net/core/dev_addr_lists.c3
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c646
-rw-r--r--net/core/flow.c12
-rw-r--r--net/core/flow_dissector.c143
-rw-r--r--net/core/neighbour.c227
-rw-r--r--net/core/net-sysfs.c324
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/netprio_cgroup.c344
-rw-r--r--net/core/pktgen.c17
-rw-r--r--net/core/request_sock.c7
-rw-r--r--net/core/rtnetlink.c25
-rw-r--r--net/core/secure_seq.c8
-rw-r--r--net/core/skbuff.c71
-rw-r--r--net/core/sock.c194
-rw-r--r--net/core/sock_diag.c192
-rw-r--r--net/core/sysctl_net_core.c9
-rw-r--r--net/dccp/ccids/ccid2.c4
-rw-r--r--net/dccp/ccids/ccid3.c2
-rw-r--r--net/dccp/ccids/lib/tfrc.c2
-rw-r--r--net/dccp/ccids/lib/tfrc.h2
-rw-r--r--net/dccp/dccp.h4
-rw-r--r--net/dccp/diag.c20
-rw-r--r--net/dccp/feat.c16
-rw-r--r--net/dccp/ipv4.c6
-rw-r--r--net/dccp/ipv6.c42
-rw-r--r--net/dccp/minisocks.c8
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/probe.c14
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/decnet/dn_neigh.c8
-rw-r--r--net/decnet/dn_route.c18
-rw-r--r--net/decnet/dn_timer.c17
-rw-r--r--net/dsa/Kconfig38
-rw-r--r--net/dsa/Makefile19
-rw-r--r--net/dsa/dsa.c51
-rw-r--r--net/dsa/dsa_priv.h127
-rw-r--r--net/dsa/mv88e6060.c288
-rw-r--r--net/dsa/mv88e6123_61_65.c447
-rw-r--r--net/dsa/mv88e6131.c443
-rw-r--r--net/dsa/mv88e6xxx.c522
-rw-r--r--net/dsa/mv88e6xxx.h95
-rw-r--r--net/dsa/tag_dsa.c15
-rw-r--r--net/dsa/tag_edsa.c15
-rw-r--r--net/dsa/tag_trailer.c15
-rw-r--r--net/econet/af_econet.c7
-rw-r--r--net/ieee802154/6lowpan.c374
-rw-r--r--net/ieee802154/6lowpan.h23
-rw-r--r--net/ieee802154/dgram.c7
-rw-r--r--net/ieee802154/raw.c7
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/ah4.c8
-rw-r--r--net/ipv4/arp.c40
-rw-r--r--net/ipv4/devinet.c5
-rw-r--r--net/ipv4/fib_rules.c1
-rw-r--r--net/ipv4/fib_trie.c1
-rw-r--r--net/ipv4/igmp.c20
-rw-r--r--net/ipv4/inet_connection_sock.c19
-rw-r--r--net/ipv4/inet_diag.c482
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_gre.c14
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_output.c23
-rw-r--r--net/ipv4/ip_sockglue.c41
-rw-r--r--net/ipv4/ipconfig.c19
-rw-r--r--net/ipv4/ipip.c9
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter.c3
-rw-r--r--net/ipv4/netfilter/Kconfig19
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/ip_queue.c6
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c16
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c14
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c16
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c2
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c127
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c141
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c96
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c20
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c36
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c22
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c2
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c15
-rw-r--r--net/ipv4/raw.c10
-rw-r--r--net/ipv4/route.c320
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c69
-rw-r--r--net/ipv4/tcp.c57
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_diag.c20
-rw-r--r--net/ipv4/tcp_input.c66
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/tcp_memcontrol.c272
-rw-r--r--net/ipv4/tcp_minisocks.c12
-rw-r--r--net/ipv4/tcp_output.c29
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv4/tunnel4.c10
-rw-r--r--net/ipv4/udp.c24
-rw-r--r--net/ipv4/udp_diag.c201
-rw-r--r--net/ipv4/xfrm4_tunnel.c6
-rw-r--r--net/ipv6/addrconf.c31
-rw-r--r--net/ipv6/af_inet6.c29
-rw-r--r--net/ipv6/ah6.c20
-rw-r--r--net/ipv6/anycast.c8
-rw-r--r--net/ipv6/datagram.c36
-rw-r--r--net/ipv6/exthdrs.c18
-rw-r--r--net/ipv6/exthdrs_core.c11
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/icmp.c25
-rw-r--r--net/ipv6/inet6_connection_sock.c14
-rw-r--r--net/ipv6/ip6_fib.c234
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_input.c11
-rw-r--r--net/ipv6/ip6_output.c37
-rw-r--r--net/ipv6/ip6_tunnel.c32
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/mcast.c44
-rw-r--r--net/ipv6/mip6.c4
-rw-r--r--net/ipv6/ndisc.c52
-rw-r--r--net/ipv6/netfilter/Kconfig11
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_queue.c5
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c11
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c133
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/proc.c15
-rw-r--r--net/ipv6/raw.c19
-rw-r--r--net/ipv6/reassembly.c4
-rw-r--r--net/ipv6/route.c334
-rw-r--r--net/ipv6/sit.c17
-rw-r--r--net/ipv6/syncookies.c8
-rw-r--r--net/ipv6/tcp_ipv6.c67
-rw-r--r--net/ipv6/udp.c32
-rw-r--r--net/ipv6/xfrm6_mode_beet.c8
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c4
-rw-r--r--net/ipv6/xfrm6_output.c4
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/ipv6/xfrm6_state.c4
-rw-r--r--net/irda/af_irda.c4
-rw-r--r--net/irda/irlan/irlan_common.c2
-rw-r--r--net/irda/irttp.c4
-rw-r--r--net/iucv/af_iucv.c103
-rw-r--r--net/key/af_key.c20
-rw-r--r--net/l2tp/l2tp_core.c10
-rw-r--r--net/llc/af_llc.c14
-rw-r--r--net/mac80211/rc80211_pid_algo.c4
-rw-r--r--net/mac80211/scan.c2
-rw-r--r--net/netfilter/Kconfig39
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/core.c15
-rw-r--r--net/netfilter/ipset/ip_set_getport.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c2
-rw-r--r--net/netfilter/ipvs/Kconfig15
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c20
-rw-r--r--net/netfilter/nf_conntrack_acct.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c20
-rw-r--r--net/netfilter/nf_conntrack_ecache.c37
-rw-r--r--net/netfilter/nf_conntrack_expect.c75
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c9
-rw-r--r--net/netfilter/nf_conntrack_helper.c12
-rw-r--r--net/netfilter/nf_conntrack_netlink.c177
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c4
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c2
-rw-r--r--net/netfilter/nfnetlink_acct.c361
-rw-r--r--net/netfilter/xt_AUDIT.c3
-rw-r--r--net/netfilter/xt_CT.c8
-rw-r--r--net/netfilter/xt_NFQUEUE.c4
-rw-r--r--net/netfilter/xt_TCPMSS.c11
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c7
-rw-r--r--net/netfilter/xt_TEE.c11
-rw-r--r--net/netfilter/xt_TPROXY.c2
-rw-r--r--net/netfilter/xt_addrtype.c10
-rw-r--r--net/netfilter/xt_connbytes.c38
-rw-r--r--net/netfilter/xt_ecn.c179
-rw-r--r--net/netfilter/xt_hashlimit.c19
-rw-r--r--net/netfilter/xt_nfacct.c76
-rw-r--r--net/netfilter/xt_socket.c8
-rw-r--r--net/netlabel/netlabel_addrlist.c8
-rw-r--r--net/netlabel/netlabel_addrlist.h2
-rw-r--r--net/netlabel/netlabel_domainhash.c20
-rw-r--r--net/netlabel/netlabel_domainhash.h2
-rw-r--r--net/netlabel/netlabel_kapi.c42
-rw-r--r--net/netlabel/netlabel_mgmt.c10
-rw-r--r--net/netlabel/netlabel_unlabeled.c30
-rw-r--r--net/netlink/af_netlink.c30
-rw-r--r--net/netlink/genetlink.c32
-rw-r--r--net/netrom/af_netrom.c17
-rw-r--r--net/netrom/nr_route.c11
-rw-r--r--net/openvswitch/Kconfig28
-rw-r--r--net/openvswitch/Makefile14
-rw-r--r--net/openvswitch/actions.c415
-rw-r--r--net/openvswitch/datapath.c1912
-rw-r--r--net/openvswitch/datapath.h125
-rw-r--r--net/openvswitch/dp_notify.c66
-rw-r--r--net/openvswitch/flow.c1346
-rw-r--r--net/openvswitch/flow.h199
-rw-r--r--net/openvswitch/vport-internal_dev.c241
-rw-r--r--net/openvswitch/vport-internal_dev.h28
-rw-r--r--net/openvswitch/vport-netdev.c198
-rw-r--r--net/openvswitch/vport-netdev.h42
-rw-r--r--net/openvswitch/vport.c398
-rw-r--r--net/openvswitch/vport.h205
-rw-r--r--net/packet/af_packet.c33
-rw-r--r--net/phonet/pep.c106
-rw-r--r--net/rds/Kconfig1
-rw-r--r--net/rfkill/rfkill-regulator.c6
-rw-r--r--net/rxrpc/ar-ack.c14
-rw-r--r--net/rxrpc/ar-key.c6
-rw-r--r--net/rxrpc/ar-output.c4
-rw-r--r--net/sched/cls_flow.c182
-rw-r--r--net/sched/sch_api.c14
-rw-r--r--net/sched/sch_choke.c121
-rw-r--r--net/sched/sch_generic.c9
-rw-r--r--net/sched/sch_gred.c36
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/sched/sch_multiq.c6
-rw-r--r--net/sched/sch_netem.c281
-rw-r--r--net/sched/sch_qfq.c17
-rw-r--r--net/sched/sch_red.c34
-rw-r--r--net/sched/sch_sfb.c17
-rw-r--r--net/sched/sch_sfq.c138
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sched/sch_teql.c37
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/auth.c2
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/ipv6.c40
-rw-r--r--net/sctp/output.c8
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_sideeffect.c8
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/sctp/sysctl.c13
-rw-r--r--net/sctp/transport.c16
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/addr.c8
-rw-r--r--net/sunrpc/sched.c30
-rw-r--r--net/sunrpc/svc.c8
-rw-r--r--net/sunrpc/svc_xprt.c8
-rw-r--r--net/sunrpc/svcauth_unix.c8
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xprt.c10
-rw-r--r--net/sunrpc/xprtsock.c7
-rw-r--r--net/tipc/bcast.c161
-rw-r--r--net/tipc/bcast.h16
-rw-r--r--net/tipc/bearer.c190
-rw-r--r--net/tipc/bearer.h77
-rw-r--r--net/tipc/config.c13
-rw-r--r--net/tipc/core.c2
-rw-r--r--net/tipc/discover.c27
-rw-r--r--net/tipc/discover.h8
-rw-r--r--net/tipc/eth_media.c159
-rw-r--r--net/tipc/link.c346
-rw-r--r--net/tipc/link.h63
-rw-r--r--net/tipc/msg.c9
-rw-r--r--net/tipc/msg.h16
-rw-r--r--net/tipc/name_distr.c14
-rw-r--r--net/tipc/name_table.c15
-rw-r--r--net/tipc/name_table.h10
-rw-r--r--net/tipc/net.c7
-rw-r--r--net/tipc/node.c25
-rw-r--r--net/tipc/node.h12
-rw-r--r--net/tipc/port.c8
-rw-r--r--net/tipc/port.h4
-rw-r--r--net/tipc/ref.c3
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tipc/subscr.c46
-rw-r--r--net/tipc/subscr.h10
-rw-r--r--net/unix/Kconfig7
-rw-r--r--net/unix/Makefile3
-rw-r--r--net/unix/af_unix.c72
-rw-r--r--net/unix/diag.c329
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/x25/x25_dev.c6
-rw-r--r--net/x25/x25_route.c2
-rw-r--r--net/xfrm/xfrm_policy.c40
-rw-r--r--net/xfrm/xfrm_state.c12
-rw-r--r--net/xfrm/xfrm_user.c12
376 files changed, 14478 insertions, 6922 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5471628d3ffe..efea35b02e7f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,27 +51,6 @@ const char vlan_version[] = DRV_VERSION;
/* End of global variables definitions. */
-static void vlan_group_free(struct vlan_group *grp)
-{
- int i;
-
- for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
- kfree(grp->vlan_devices_arrays[i]);
- kfree(grp);
-}
-
-static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
-{
- struct vlan_group *grp;
-
- grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
- if (!grp)
- return NULL;
-
- grp->real_dev = real_dev;
- return grp;
-}
-
static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
{
struct net_device **array;
@@ -92,32 +71,29 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
return 0;
}
-static void vlan_rcu_free(struct rcu_head *rcu)
-{
- vlan_group_free(container_of(rcu, struct vlan_group, rcu));
-}
-
void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
- const struct net_device_ops *ops = real_dev->netdev_ops;
+ struct vlan_info *vlan_info;
struct vlan_group *grp;
u16 vlan_id = vlan->vlan_id;
ASSERT_RTNL();
- grp = rtnl_dereference(real_dev->vlgrp);
- BUG_ON(!grp);
+ vlan_info = rtnl_dereference(real_dev->vlan_info);
+ BUG_ON(!vlan_info);
+
+ grp = &vlan_info->grp;
/* Take it out of our own structures, but be sure to interlock with
* HW accelerating devices or SW vlan input packet processing if
* VLAN is not 0 (leave it there for 802.1p).
*/
- if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
- ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
+ if (vlan_id)
+ vlan_vid_del(real_dev, vlan_id);
- grp->nr_vlans--;
+ grp->nr_vlan_devs--;
if (vlan->flags & VLAN_FLAG_GVRP)
vlan_gvrp_request_leave(dev);
@@ -129,16 +105,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
*/
unregister_netdevice_queue(dev, head);
- /* If the group is now empty, kill off the group. */
- if (grp->nr_vlans == 0) {
+ if (grp->nr_vlan_devs == 0)
vlan_gvrp_uninit_applicant(real_dev);
- RCU_INIT_POINTER(real_dev->vlgrp, NULL);
-
- /* Free the group, after all cpu's are done. */
- call_rcu(&grp->rcu, vlan_rcu_free);
- }
-
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
}
@@ -167,21 +136,26 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
int register_vlan_dev(struct net_device *dev)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
- const struct net_device_ops *ops = real_dev->netdev_ops;
u16 vlan_id = vlan->vlan_id;
- struct vlan_group *grp, *ngrp = NULL;
+ struct vlan_info *vlan_info;
+ struct vlan_group *grp;
int err;
- grp = rtnl_dereference(real_dev->vlgrp);
- if (!grp) {
- ngrp = grp = vlan_group_alloc(real_dev);
- if (!grp)
- return -ENOBUFS;
+ err = vlan_vid_add(real_dev, vlan_id);
+ if (err)
+ return err;
+
+ vlan_info = rtnl_dereference(real_dev->vlan_info);
+ /* vlan_info should be there now. vlan_vid_add took care of it */
+ BUG_ON(!vlan_info);
+
+ grp = &vlan_info->grp;
+ if (grp->nr_vlan_devs == 0) {
err = vlan_gvrp_init_applicant(real_dev);
if (err < 0)
- goto out_free_group;
+ goto out_vid_del;
}
err = vlan_group_prealloc_vid(grp, vlan_id);
@@ -192,7 +166,7 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_applicant;
- /* Account for reference in struct vlan_dev_info */
+ /* Account for reference in struct vlan_dev_priv */
dev_hold(real_dev);
netif_stacked_transfer_operstate(real_dev, dev);
@@ -202,24 +176,15 @@ int register_vlan_dev(struct net_device *dev)
* it into our local structure.
*/
vlan_group_set_device(grp, vlan_id, dev);
- grp->nr_vlans++;
-
- if (ngrp) {
- rcu_assign_pointer(real_dev->vlgrp, ngrp);
- }
- if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
- ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
+ grp->nr_vlan_devs++;
return 0;
out_uninit_applicant:
- if (ngrp)
+ if (grp->nr_vlan_devs == 0)
vlan_gvrp_uninit_applicant(real_dev);
-out_free_group:
- if (ngrp) {
- /* Free the group, after all cpu's are done. */
- call_rcu(&ngrp->rcu, vlan_rcu_free);
- }
+out_vid_del:
+ vlan_vid_del(real_dev, vlan_id);
return err;
}
@@ -267,7 +232,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
}
- new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
+ new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup);
if (new_dev == NULL)
return -ENOBUFS;
@@ -278,10 +243,10 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
*/
new_dev->mtu = real_dev->mtu;
- vlan_dev_info(new_dev)->vlan_id = vlan_id;
- vlan_dev_info(new_dev)->real_dev = real_dev;
- vlan_dev_info(new_dev)->dent = NULL;
- vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+ vlan_dev_priv(new_dev)->vlan_id = vlan_id;
+ vlan_dev_priv(new_dev)->real_dev = real_dev;
+ vlan_dev_priv(new_dev)->dent = NULL;
+ vlan_dev_priv(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
new_dev->rtnl_link_ops = &vlan_link_ops;
err = register_vlan_dev(new_dev);
@@ -298,7 +263,7 @@ out_free_newdev:
static void vlan_sync_address(struct net_device *dev,
struct net_device *vlandev)
{
- struct vlan_dev_info *vlan = vlan_dev_info(vlandev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
/* May be called without an actual change */
if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
@@ -360,25 +325,26 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
{
struct net_device *dev = ptr;
struct vlan_group *grp;
+ struct vlan_info *vlan_info;
int i, flgs;
struct net_device *vlandev;
- struct vlan_dev_info *vlan;
+ struct vlan_dev_priv *vlan;
LIST_HEAD(list);
if (is_vlan_dev(dev))
__vlan_device_event(dev, event);
if ((event == NETDEV_UP) &&
- (dev->features & NETIF_F_HW_VLAN_FILTER) &&
- dev->netdev_ops->ndo_vlan_rx_add_vid) {
+ (dev->features & NETIF_F_HW_VLAN_FILTER)) {
pr_info("adding VLAN 0 to HW filter on device %s\n",
dev->name);
- dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
+ vlan_vid_add(dev, 0);
}
- grp = rtnl_dereference(dev->vlgrp);
- if (!grp)
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ if (!vlan_info)
goto out;
+ grp = &vlan_info->grp;
/* It is OK that we do not hold the group lock right now,
* as we run under the RTNL lock.
@@ -447,7 +413,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
if (!(flgs & IFF_UP))
continue;
- vlan = vlan_dev_info(vlandev);
+ vlan = vlan_dev_priv(vlandev);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
dev_change_flags(vlandev, flgs & ~IFF_UP);
netif_stacked_transfer_operstate(dev, vlandev);
@@ -465,7 +431,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
if (flgs & IFF_UP)
continue;
- vlan = vlan_dev_info(vlandev);
+ vlan = vlan_dev_priv(vlandev);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
dev_change_flags(vlandev, flgs | IFF_UP);
netif_stacked_transfer_operstate(dev, vlandev);
@@ -482,9 +448,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
if (!vlandev)
continue;
- /* unregistration of last vlan destroys group, abort
+ /* removal of last vid destroys vlan_info, abort
* afterwards */
- if (grp->nr_vlans == 1)
+ if (vlan_info->nr_vids == 1)
i = VLAN_N_VID;
unregister_vlan_dev(vlandev, &list);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 9fd45f3571f9..a4886d94c40c 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,6 +3,7 @@
#include <linux/if_vlan.h>
#include <linux/u64_stats_sync.h>
+#include <linux/list.h>
/**
@@ -40,8 +41,10 @@ struct vlan_pcpu_stats {
u32 tx_dropped;
};
+struct netpoll;
+
/**
- * struct vlan_dev_info - VLAN private device data
+ * struct vlan_dev_priv - VLAN private device data
* @nr_ingress_mappings: number of ingress priority mappings
* @ingress_priority_map: ingress priority mappings
* @nr_egress_mappings: number of egress priority mappings
@@ -53,7 +56,7 @@ struct vlan_pcpu_stats {
* @dent: proc dir entry
* @vlan_pcpu_stats: ptr to percpu rx stats
*/
-struct vlan_dev_info {
+struct vlan_dev_priv {
unsigned int nr_ingress_mappings;
u32 ingress_priority_map[8];
unsigned int nr_egress_mappings;
@@ -67,13 +70,39 @@ struct vlan_dev_info {
struct proc_dir_entry *dent;
struct vlan_pcpu_stats __percpu *vlan_pcpu_stats;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct netpoll *netpoll;
+#endif
};
-static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
+static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
{
return netdev_priv(dev);
}
+/* if this changes, algorithm will have to be reworked because this
+ * depends on completely exhausting the VLAN identifier space. Thus
+ * it gives constant time look-up, but in many cases it wastes memory.
+ */
+#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8
+#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+
+struct vlan_group {
+ unsigned int nr_vlan_devs;
+ struct hlist_node hlist; /* linked list */
+ struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
+};
+
+struct vlan_info {
+ struct net_device *real_dev; /* The ethernet(like) device
+ * the vlan is attached to.
+ */
+ struct vlan_group grp;
+ struct list_head vid_list;
+ unsigned int nr_vids;
+ struct rcu_head rcu;
+};
+
static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
u16 vlan_id)
{
@@ -97,10 +126,10 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
u16 vlan_id)
{
- struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
+ struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
- if (grp)
- return vlan_group_get_device(grp, vlan_id);
+ if (vlan_info)
+ return vlan_group_get_device(&vlan_info->grp, vlan_id);
return NULL;
}
@@ -121,7 +150,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
static inline u32 vlan_get_ingress_priority(struct net_device *dev,
u16 vlan_tci)
{
- struct vlan_dev_info *vip = vlan_dev_info(dev);
+ struct vlan_dev_priv *vip = vlan_dev_priv(dev);
return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7];
}
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f5ffc02729d6..4d39d802be2c 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -36,7 +36,7 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
skb->pkt_type = PACKET_HOST;
}
- if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+ if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
unsigned int offset = skb->data - skb_mac_header(skb);
/*
@@ -55,7 +55,7 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
skb->vlan_tci = 0;
- rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
+ rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->rx_packets++;
@@ -71,10 +71,10 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
u16 vlan_id)
{
- struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
+ struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
- if (grp) {
- return vlan_group_get_device(grp, vlan_id);
+ if (vlan_info) {
+ return vlan_group_get_device(&vlan_info->grp, vlan_id);
} else {
/*
* Bonding slaves do not have grp assigned to themselves.
@@ -90,13 +90,13 @@ EXPORT_SYMBOL(__vlan_find_dev_deep);
struct net_device *vlan_dev_real_dev(const struct net_device *dev)
{
- return vlan_dev_info(dev)->real_dev;
+ return vlan_dev_priv(dev)->real_dev;
}
EXPORT_SYMBOL(vlan_dev_real_dev);
u16 vlan_dev_vlan_id(const struct net_device *dev)
{
- return vlan_dev_info(dev)->vlan_id;
+ return vlan_dev_priv(dev)->vlan_id;
}
EXPORT_SYMBOL(vlan_dev_vlan_id);
@@ -110,39 +110,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
return skb;
}
-static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
-{
- __be16 proto;
- unsigned char *rawp;
-
- /*
- * Was a VLAN packet, grab the encapsulated protocol, which the layer
- * three protocols care about.
- */
-
- proto = vhdr->h_vlan_encapsulated_proto;
- if (ntohs(proto) >= 1536) {
- skb->protocol = proto;
- return;
- }
-
- rawp = skb->data;
- if (*(unsigned short *) rawp == 0xFFFF)
- /*
- * This is a magic hack to spot IPX packets. Older Novell
- * breaks the protocol design and runs IPX over 802.3 without
- * an 802.2 LLC layer. We look for FFFF which isn't a used
- * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
- * but does for the rest.
- */
- skb->protocol = htons(ETH_P_802_3);
- else
- /*
- * Real 802.2 LLC
- */
- skb->protocol = htons(ETH_P_802_2);
-}
-
struct sk_buff *vlan_untag(struct sk_buff *skb)
{
struct vlan_hdr *vhdr;
@@ -179,3 +146,226 @@ err_free:
kfree_skb(skb);
return NULL;
}
+
+
+/*
+ * vlan info and vid list
+ */
+
+static void vlan_group_free(struct vlan_group *grp)
+{
+ int i;
+
+ for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
+ kfree(grp->vlan_devices_arrays[i]);
+}
+
+static void vlan_info_free(struct vlan_info *vlan_info)
+{
+ vlan_group_free(&vlan_info->grp);
+ kfree(vlan_info);
+}
+
+static void vlan_info_rcu_free(struct rcu_head *rcu)
+{
+ vlan_info_free(container_of(rcu, struct vlan_info, rcu));
+}
+
+static struct vlan_info *vlan_info_alloc(struct net_device *dev)
+{
+ struct vlan_info *vlan_info;
+
+ vlan_info = kzalloc(sizeof(struct vlan_info), GFP_KERNEL);
+ if (!vlan_info)
+ return NULL;
+
+ vlan_info->real_dev = dev;
+ INIT_LIST_HEAD(&vlan_info->vid_list);
+ return vlan_info;
+}
+
+struct vlan_vid_info {
+ struct list_head list;
+ unsigned short vid;
+ int refcount;
+};
+
+static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
+ unsigned short vid)
+{
+ struct vlan_vid_info *vid_info;
+
+ list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ if (vid_info->vid == vid)
+ return vid_info;
+ }
+ return NULL;
+}
+
+static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid)
+{
+ struct vlan_vid_info *vid_info;
+
+ vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
+ if (!vid_info)
+ return NULL;
+ vid_info->vid = vid;
+
+ return vid_info;
+}
+
+static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
+ struct vlan_vid_info **pvid_info)
+{
+ struct net_device *dev = vlan_info->real_dev;
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct vlan_vid_info *vid_info;
+ int err;
+
+ vid_info = vlan_vid_info_alloc(vid);
+ if (!vid_info)
+ return -ENOMEM;
+
+ if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+ ops->ndo_vlan_rx_add_vid) {
+ err = ops->ndo_vlan_rx_add_vid(dev, vid);
+ if (err) {
+ kfree(vid_info);
+ return err;
+ }
+ }
+ list_add(&vid_info->list, &vlan_info->vid_list);
+ vlan_info->nr_vids++;
+ *pvid_info = vid_info;
+ return 0;
+}
+
+int vlan_vid_add(struct net_device *dev, unsigned short vid)
+{
+ struct vlan_info *vlan_info;
+ struct vlan_vid_info *vid_info;
+ bool vlan_info_created = false;
+ int err;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ if (!vlan_info) {
+ vlan_info = vlan_info_alloc(dev);
+ if (!vlan_info)
+ return -ENOMEM;
+ vlan_info_created = true;
+ }
+ vid_info = vlan_vid_info_get(vlan_info, vid);
+ if (!vid_info) {
+ err = __vlan_vid_add(vlan_info, vid, &vid_info);
+ if (err)
+ goto out_free_vlan_info;
+ }
+ vid_info->refcount++;
+
+ if (vlan_info_created)
+ rcu_assign_pointer(dev->vlan_info, vlan_info);
+
+ return 0;
+
+out_free_vlan_info:
+ if (vlan_info_created)
+ kfree(vlan_info);
+ return err;
+}
+EXPORT_SYMBOL(vlan_vid_add);
+
+static void __vlan_vid_del(struct vlan_info *vlan_info,
+ struct vlan_vid_info *vid_info)
+{
+ struct net_device *dev = vlan_info->real_dev;
+ const struct net_device_ops *ops = dev->netdev_ops;
+ unsigned short vid = vid_info->vid;
+ int err;
+
+ if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+ ops->ndo_vlan_rx_kill_vid) {
+ err = ops->ndo_vlan_rx_kill_vid(dev, vid);
+ if (err) {
+ pr_warn("failed to kill vid %d for device %s\n",
+ vid, dev->name);
+ }
+ }
+ list_del(&vid_info->list);
+ kfree(vid_info);
+ vlan_info->nr_vids--;
+}
+
+void vlan_vid_del(struct net_device *dev, unsigned short vid)
+{
+ struct vlan_info *vlan_info;
+ struct vlan_vid_info *vid_info;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ if (!vlan_info)
+ return;
+
+ vid_info = vlan_vid_info_get(vlan_info, vid);
+ if (!vid_info)
+ return;
+ vid_info->refcount--;
+ if (vid_info->refcount == 0) {
+ __vlan_vid_del(vlan_info, vid_info);
+ if (vlan_info->nr_vids == 0) {
+ RCU_INIT_POINTER(dev->vlan_info, NULL);
+ call_rcu(&vlan_info->rcu, vlan_info_rcu_free);
+ }
+ }
+}
+EXPORT_SYMBOL(vlan_vid_del);
+
+int vlan_vids_add_by_dev(struct net_device *dev,
+ const struct net_device *by_dev)
+{
+ struct vlan_vid_info *vid_info;
+ struct vlan_info *vlan_info;
+ int err;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(by_dev->vlan_info);
+ if (!vlan_info)
+ return 0;
+
+ list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ err = vlan_vid_add(dev, vid_info->vid);
+ if (err)
+ goto unwind;
+ }
+ return 0;
+
+unwind:
+ list_for_each_entry_continue_reverse(vid_info,
+ &vlan_info->vid_list,
+ list) {
+ vlan_vid_del(dev, vid_info->vid);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(vlan_vids_add_by_dev);
+
+void vlan_vids_del_by_dev(struct net_device *dev,
+ const struct net_device *by_dev)
+{
+ struct vlan_vid_info *vid_info;
+ struct vlan_info *vlan_info;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(by_dev->vlan_info);
+ if (!vlan_info)
+ return;
+
+ list_for_each_entry(vid_info, &vlan_info->vid_list, list)
+ vlan_vid_del(dev, vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_vids_del_by_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index bc2528624583..9988d4abb372 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -33,6 +33,7 @@
#include "vlan.h"
#include "vlanproc.h"
#include <linux/if_vlan.h>
+#include <linux/netpoll.h>
/*
* Rebuild the Ethernet MAC header. This is called after an ARP
@@ -72,7 +73,7 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
{
struct vlan_priority_tci_mapping *mp;
- mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
+ mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
while (mp) {
if (mp->priority == skb->priority) {
return mp->vlan_qos; /* This should already be shifted
@@ -103,10 +104,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
u16 vlan_tci = 0;
int rc;
- if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+ if (!(vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
- vlan_tci = vlan_dev_info(dev)->vlan_id;
+ vlan_tci = vlan_dev_priv(dev)->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
vhdr->h_vlan_TCI = htons(vlan_tci);
@@ -129,7 +130,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
saddr = dev->dev_addr;
/* Now make the underlying real hard header */
- dev = vlan_dev_info(dev)->real_dev;
+ dev = vlan_dev_priv(dev)->real_dev;
rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
if (rc > 0)
rc += vhdrlen;
@@ -149,27 +150,29 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
- vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+ vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) {
u16 vlan_tci;
- vlan_tci = vlan_dev_info(dev)->vlan_id;
+ vlan_tci = vlan_dev_priv(dev)->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
}
- skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
+ skb_set_dev(skb, vlan_dev_priv(dev)->real_dev);
len = skb->len;
+ if (netpoll_tx_running(dev))
+ return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev);
ret = dev_queue_xmit(skb);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
struct vlan_pcpu_stats *stats;
- stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+ stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += len;
u64_stats_update_end(&stats->syncp);
} else {
- this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+ this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped);
}
return ret;
@@ -180,7 +183,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
/* TODO: gotta make sure the underlying layer can handle it,
* maybe an IFF_VLAN_CAPABLE flag for devices?
*/
- if (vlan_dev_info(dev)->real_dev->mtu < new_mtu)
+ if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
return -ERANGE;
dev->mtu = new_mtu;
@@ -191,7 +194,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
vlan->nr_ingress_mappings--;
@@ -204,7 +207,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct vlan_priority_tci_mapping *mp = NULL;
struct vlan_priority_tci_mapping *np;
u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
@@ -241,7 +244,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
u32 old_flags = vlan->flags;
if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
@@ -261,12 +264,12 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
{
- strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
+ strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23);
}
static int vlan_dev_open(struct net_device *dev)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
int err;
@@ -313,7 +316,7 @@ out:
static int vlan_dev_stop(struct net_device *dev)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
dev_mc_unsync(real_dev, dev);
@@ -332,7 +335,7 @@ static int vlan_dev_stop(struct net_device *dev)
static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
struct sockaddr *addr = p;
int err;
@@ -358,7 +361,7 @@ out:
static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
struct ifreq ifrr;
int err = -EOPNOTSUPP;
@@ -383,7 +386,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
int err = 0;
@@ -397,7 +400,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
struct scatterlist *sgl, unsigned int sgc)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
int rc = 0;
@@ -409,7 +412,7 @@ static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
int len = 0;
@@ -421,7 +424,7 @@ static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
static int vlan_dev_fcoe_enable(struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
int rc = -EINVAL;
@@ -432,7 +435,7 @@ static int vlan_dev_fcoe_enable(struct net_device *dev)
static int vlan_dev_fcoe_disable(struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
int rc = -EINVAL;
@@ -443,7 +446,7 @@ static int vlan_dev_fcoe_disable(struct net_device *dev)
static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
int rc = -EINVAL;
@@ -455,7 +458,7 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
struct scatterlist *sgl, unsigned int sgc)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
const struct net_device_ops *ops = real_dev->netdev_ops;
int rc = 0;
@@ -468,7 +471,7 @@ static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
if (dev->flags & IFF_UP) {
if (change & IFF_ALLMULTI)
@@ -480,8 +483,8 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
{
- dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
- dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+ dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+ dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
}
/*
@@ -519,7 +522,7 @@ static const struct net_device_ops vlan_netdev_ops;
static int vlan_dev_init(struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
int subclass = 0;
netif_carrier_off(dev);
@@ -568,8 +571,8 @@ static int vlan_dev_init(struct net_device *dev)
vlan_dev_set_lockdep_class(dev, subclass);
- vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
- if (!vlan_dev_info(dev)->vlan_pcpu_stats)
+ vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+ if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
return -ENOMEM;
return 0;
@@ -578,7 +581,7 @@ static int vlan_dev_init(struct net_device *dev)
static void vlan_dev_uninit(struct net_device *dev)
{
struct vlan_priority_tci_mapping *pm;
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
int i;
free_percpu(vlan->vlan_pcpu_stats);
@@ -591,18 +594,17 @@ static void vlan_dev_uninit(struct net_device *dev)
}
}
-static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
u32 old_features = features;
- features &= real_dev->features;
features &= real_dev->vlan_features;
+ features |= NETIF_F_RXCSUM;
+ features &= real_dev->features;
features |= old_features & NETIF_F_SOFT_FEATURES;
-
- if (dev_ethtool_get_rx_csum(real_dev))
- features |= NETIF_F_RXCSUM;
features |= NETIF_F_LLTX;
return features;
@@ -611,7 +613,7 @@ static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
static int vlan_ethtool_get_settings(struct net_device *dev,
struct ethtool_cmd *cmd)
{
- const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
return __ethtool_get_settings(vlan->real_dev, cmd);
}
@@ -627,7 +629,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
- if (vlan_dev_info(dev)->vlan_pcpu_stats) {
+ if (vlan_dev_priv(dev)->vlan_pcpu_stats) {
struct vlan_pcpu_stats *p;
u32 rx_errors = 0, tx_dropped = 0;
int i;
@@ -636,7 +638,7 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
unsigned int start;
- p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
+ p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
do {
start = u64_stats_fetch_begin_bh(&p->syncp);
rxpackets = p->rx_packets;
@@ -661,6 +663,57 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
return stats;
}
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void vlan_dev_poll_controller(struct net_device *dev)
+{
+ return;
+}
+
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+{
+ struct vlan_dev_priv *info = vlan_dev_priv(dev);
+ struct net_device *real_dev = info->real_dev;
+ struct netpoll *netpoll;
+ int err = 0;
+
+ netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!netpoll)
+ goto out;
+
+ netpoll->dev = real_dev;
+ strlcpy(netpoll->dev_name, real_dev->name, IFNAMSIZ);
+
+ err = __netpoll_setup(netpoll);
+ if (err) {
+ kfree(netpoll);
+ goto out;
+ }
+
+ info->netpoll = netpoll;
+
+out:
+ return err;
+}
+
+static void vlan_dev_netpoll_cleanup(struct net_device *dev)
+{
+ struct vlan_dev_priv *info = vlan_dev_priv(dev);
+ struct netpoll *netpoll = info->netpoll;
+
+ if (!netpoll)
+ return;
+
+ info->netpoll = NULL;
+
+ /* Wait for transmitting packets to finish before freeing. */
+ synchronize_rcu_bh();
+
+ __netpoll_cleanup(netpoll);
+ kfree(netpoll);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
static const struct ethtool_ops vlan_ethtool_ops = {
.get_settings = vlan_ethtool_get_settings,
.get_drvinfo = vlan_ethtool_get_drvinfo,
@@ -689,6 +742,11 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
.ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target,
#endif
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = vlan_dev_poll_controller,
+ .ndo_netpoll_setup = vlan_dev_netpoll_setup,
+ .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
+#endif
.ndo_fix_features = vlan_dev_fix_features,
};
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 061ceceeef12..6f9755352760 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -29,7 +29,7 @@ static struct garp_application vlan_gvrp_app __read_mostly = {
int vlan_gvrp_request_join(const struct net_device *dev)
{
- const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
__be16 vlan_id = htons(vlan->vlan_id);
return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
@@ -38,7 +38,7 @@ int vlan_gvrp_request_join(const struct net_device *dev)
void vlan_gvrp_request_leave(const struct net_device *dev)
{
- const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
__be16 vlan_id = htons(vlan->vlan_id);
garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 235c2197dbb6..50711368ad6a 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -105,7 +105,7 @@ static int vlan_changelink(struct net_device *dev,
static int vlan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev;
int err;
@@ -149,7 +149,7 @@ static inline size_t vlan_qos_map_size(unsigned int n)
static size_t vlan_get_size(const struct net_device *dev)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
return nla_total_size(2) + /* IFLA_VLAN_ID */
sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
@@ -159,14 +159,14 @@ static size_t vlan_get_size(const struct net_device *dev)
static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct vlan_priority_tci_mapping *pm;
struct ifla_vlan_flags f;
struct ifla_vlan_qos_mapping m;
struct nlattr *nest;
unsigned int i;
- NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
+ NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id);
if (vlan->flags) {
f.flags = vlan->flags;
f.mask = ~0;
@@ -218,7 +218,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
.kind = "vlan",
.maxtype = IFLA_VLAN_MAX,
.policy = vlan_policy,
- .priv_size = sizeof(struct vlan_dev_info),
+ .priv_size = sizeof(struct vlan_dev_priv),
.setup = vlan_setup,
.validate = vlan_validate,
.newlink = vlan_newlink,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d34b6daf8930..c718fd3664b6 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -168,13 +168,13 @@ err:
int vlan_proc_add_dev(struct net_device *vlandev)
{
- struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+ struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
- dev_info->dent =
+ vlan->dent =
proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
vn->proc_vlan_dir, &vlandev_fops, vlandev);
- if (!dev_info->dent)
+ if (!vlan->dent)
return -ENOBUFS;
return 0;
}
@@ -187,10 +187,10 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
/** NOTE: This will consume the memory pointed to by dent, it seems. */
- if (vlan_dev_info(vlandev)->dent) {
- remove_proc_entry(vlan_dev_info(vlandev)->dent->name,
+ if (vlan_dev_priv(vlandev)->dent) {
+ remove_proc_entry(vlan_dev_priv(vlandev)->dent->name,
vn->proc_vlan_dir);
- vlan_dev_info(vlandev)->dent = NULL;
+ vlan_dev_priv(vlandev)->dent = NULL;
}
return 0;
}
@@ -268,10 +268,10 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
nmtype ? nmtype : "UNKNOWN");
} else {
const struct net_device *vlandev = v;
- const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+ const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
seq_printf(seq, "%-15s| %d | %s\n", vlandev->name,
- dev_info->vlan_id, dev_info->real_dev->name);
+ vlan->vlan_id, vlan->real_dev->name);
}
return 0;
}
@@ -279,7 +279,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
static int vlandev_seq_show(struct seq_file *seq, void *offset)
{
struct net_device *vlandev = (struct net_device *) seq->private;
- const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+ const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats;
static const char fmt64[] = "%30s %12llu\n";
@@ -291,8 +291,8 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
stats = dev_get_stats(vlandev, &temp);
seq_printf(seq,
"%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n",
- vlandev->name, dev_info->vlan_id,
- (int)(dev_info->flags & 1), vlandev->priv_flags);
+ vlandev->name, vlan->vlan_id,
+ (int)(vlan->flags & 1), vlandev->priv_flags);
seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
@@ -300,23 +300,23 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
seq_puts(seq, "\n");
seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
- seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+ seq_printf(seq, "Device: %s", vlan->real_dev->name);
/* now show all PRIORITY mappings relating to this VLAN */
seq_printf(seq, "\nINGRESS priority mappings: "
"0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n",
- dev_info->ingress_priority_map[0],
- dev_info->ingress_priority_map[1],
- dev_info->ingress_priority_map[2],
- dev_info->ingress_priority_map[3],
- dev_info->ingress_priority_map[4],
- dev_info->ingress_priority_map[5],
- dev_info->ingress_priority_map[6],
- dev_info->ingress_priority_map[7]);
+ vlan->ingress_priority_map[0],
+ vlan->ingress_priority_map[1],
+ vlan->ingress_priority_map[2],
+ vlan->ingress_priority_map[3],
+ vlan->ingress_priority_map[4],
+ vlan->ingress_priority_map[5],
+ vlan->ingress_priority_map[6],
+ vlan->ingress_priority_map[7]);
seq_printf(seq, " EGRESS priority mappings: ");
for (i = 0; i < 16; i++) {
const struct vlan_priority_tci_mapping *mp
- = dev_info->egress_priority_map[i];
+ = vlan->egress_priority_map[i];
while (mp) {
seq_printf(seq, "%u:%hu ",
mp->priority, ((mp->vlan_qos >> 13) & 0x7));
diff --git a/net/Kconfig b/net/Kconfig
index a07314844238..e07272d0bb2d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -215,6 +215,7 @@ source "net/sched/Kconfig"
source "net/dcb/Kconfig"
source "net/dns_resolver/Kconfig"
source "net/batman-adv/Kconfig"
+source "net/openvswitch/Kconfig"
config RPS
boolean
@@ -232,6 +233,19 @@ config XPS
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
+config NETPRIO_CGROUP
+ tristate "Network priority cgroup"
+ depends on CGROUPS
+ ---help---
+ Cgroup subsystem for use in assigning processes to network priorities on
+ a per-interface basis
+
+config BQL
+ boolean
+ depends on SYSFS
+ select DQL
+ default y
+
config HAVE_BPF_JIT
bool
diff --git a/net/Makefile b/net/Makefile
index acdde4950de4..ad432fa4d934 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
obj-$(CONFIG_CEPH_LIB) += ceph/
obj-$(CONFIG_BATMAN_ADV) += batman-adv/
obj-$(CONFIG_NFC) += nfc/
+obj-$(CONFIG_OPENVSWITCH) += openvswitch/
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index f41f02656ff4..876fbe83e2e4 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -26,7 +26,7 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc, int pdu_size,
gfp_t gfp_flags)
{
struct sock *sk = sk_atm(vcc);
- int guess = atm_guess_pdu2truesize(pdu_size);
+ int guess = SKB_TRUESIZE(pdu_size);
atm_force_charge(vcc, guess);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index d07223c834af..353fccf1cde3 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -53,6 +53,7 @@ static const unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
static const unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
static const unsigned char llc_oui_pid_pad[] =
{ LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
+static const unsigned char pad[] = { PAD_BRIDGED };
static const unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
static const unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
@@ -202,7 +203,10 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
{
struct br2684_dev *brdev = BRPRIV(dev);
struct atm_vcc *atmvcc;
- int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
+ int minheadroom = (brvcc->encaps == e_llc) ?
+ ((brdev->payload == p_bridged) ?
+ sizeof(llc_oui_pid_pad) : sizeof(llc_oui_ipv4)) :
+ ((brdev->payload == p_bridged) ? BR2684_PAD_LEN : 0);
if (skb_headroom(skb) < minheadroom) {
struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
@@ -450,7 +454,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
skb->pkt_type = PACKET_HOST;
} else { /* p_bridged */
/* first 2 chars should be 0 */
- if (*((u16 *) (skb->data)) != 0)
+ if (memcmp(skb->data, pad, BR2684_PAD_LEN) != 0)
goto error;
skb_pull(skb, BR2684_PAD_LEN);
skb->protocol = eth_type_trans(skb, net_dev);
@@ -489,15 +493,11 @@ free_skb:
*/
static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
{
- struct sk_buff_head queue;
- int err;
struct br2684_vcc *brvcc;
- struct sk_buff *skb, *tmp;
- struct sk_buff_head *rq;
struct br2684_dev *brdev;
struct net_device *net_dev;
struct atm_backend_br2684 be;
- unsigned long flags;
+ int err;
if (copy_from_user(&be, arg, sizeof be))
return -EFAULT;
@@ -550,23 +550,6 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
atmvcc->push = br2684_push;
atmvcc->pop = br2684_pop;
- __skb_queue_head_init(&queue);
- rq = &sk_atm(atmvcc)->sk_receive_queue;
-
- spin_lock_irqsave(&rq->lock, flags);
- skb_queue_splice_init(rq, &queue);
- spin_unlock_irqrestore(&rq->lock, flags);
-
- skb_queue_walk_safe(&queue, skb, tmp) {
- struct net_device *dev;
-
- br2684_push(atmvcc, skb);
- dev = skb->dev;
-
- dev->stats.rx_bytes -= skb->len;
- dev->stats.rx_packets--;
- }
-
/* initialize netdev carrier state */
if (atmvcc->dev->signal == ATM_PHY_SIG_LOST)
netif_carrier_off(net_dev);
@@ -574,6 +557,10 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
netif_carrier_on(net_dev);
__module_get(THIS_MODULE);
+
+ /* re-process everything received between connection setup and
+ backend setup */
+ vcc_process_recv_queue(atmvcc);
return 0;
error:
@@ -600,6 +587,7 @@ static void br2684_setup(struct net_device *netdev)
struct br2684_dev *brdev = BRPRIV(netdev);
ether_setup(netdev);
+ netdev->hard_header_len += sizeof(llc_oui_pid_pad); /* worst case */
brdev->net_dev = netdev;
netdev->netdev_ops = &br2684_netdev_ops;
@@ -612,7 +600,7 @@ static void br2684_setup_routed(struct net_device *netdev)
struct br2684_dev *brdev = BRPRIV(netdev);
brdev->net_dev = netdev;
- netdev->hard_header_len = 0;
+ netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */
netdev->netdev_ops = &br2684_netdev_ops_routed;
netdev->addr_len = 0;
netdev->mtu = 1500;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 852394072fa1..c12c2582457c 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <net/route.h> /* for struct rtable and routing */
#include <net/icmp.h> /* icmp_send */
+#include <net/arp.h>
#include <linux/param.h> /* for HZ */
#include <linux/uaccess.h>
#include <asm/byteorder.h> /* for htons etc. */
@@ -119,7 +120,7 @@ out:
/* The neighbour entry n->lock is held. */
static int neigh_check_cb(struct neighbour *n)
{
- struct atmarp_entry *entry = NEIGH2ENTRY(n);
+ struct atmarp_entry *entry = neighbour_priv(n);
struct clip_vcc *cv;
for (cv = entry->vccs; cv; cv = cv->next) {
@@ -189,6 +190,13 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
pr_debug("\n");
+
+ if (!clip_devs) {
+ atm_return(vcc, skb->truesize);
+ kfree_skb(skb);
+ return;
+ }
+
if (!skb) {
pr_debug("removing VCC %p\n", clip_vcc);
if (clip_vcc->entry)
@@ -255,8 +263,10 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb)
static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
+ __be32 *ip = (__be32 *) neigh->primary_key;
+
pr_debug("(neigh %p, skb %p)\n", neigh, skb);
- to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip);
+ to_atmarpd(act_need, PRIV(neigh->dev)->number, *ip);
}
static void clip_neigh_error(struct neighbour *neigh, struct sk_buff *skb)
@@ -277,72 +287,24 @@ static const struct neigh_ops clip_neigh_ops = {
static int clip_constructor(struct neighbour *neigh)
{
- struct atmarp_entry *entry = NEIGH2ENTRY(neigh);
- struct net_device *dev = neigh->dev;
- struct in_device *in_dev;
- struct neigh_parms *parms;
+ struct atmarp_entry *entry = neighbour_priv(neigh);
- pr_debug("(neigh %p, entry %p)\n", neigh, entry);
- neigh->type = inet_addr_type(&init_net, entry->ip);
- if (neigh->type != RTN_UNICAST)
+ if (neigh->tbl->family != AF_INET)
return -EINVAL;
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dev);
- if (!in_dev) {
- rcu_read_unlock();
+ if (neigh->type != RTN_UNICAST)
return -EINVAL;
- }
-
- parms = in_dev->arp_parms;
- __neigh_parms_put(neigh->parms);
- neigh->parms = neigh_parms_clone(parms);
- rcu_read_unlock();
+ neigh->nud_state = NUD_NONE;
neigh->ops = &clip_neigh_ops;
- neigh->output = neigh->nud_state & NUD_VALID ?
- neigh->ops->connected_output : neigh->ops->output;
+ neigh->output = neigh->ops->output;
entry->neigh = neigh;
entry->vccs = NULL;
entry->expires = jiffies - 1;
+
return 0;
}
-static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd)
-{
- return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd);
-}
-
-static struct neigh_table clip_tbl = {
- .family = AF_INET,
- .entry_size = sizeof(struct neighbour)+sizeof(struct atmarp_entry),
- .key_len = 4,
- .hash = clip_hash,
- .constructor = clip_constructor,
- .id = "clip_arp_cache",
-
- /* parameters are copied from ARP ... */
- .parms = {
- .tbl = &clip_tbl,
- .base_reachable_time = 30 * HZ,
- .retrans_time = 1 * HZ,
- .gc_staletime = 60 * HZ,
- .reachable_time = 30 * HZ,
- .delay_probe_time = 5 * HZ,
- .queue_len = 3,
- .ucast_probes = 3,
- .mcast_probes = 3,
- .anycast_delay = 1 * HZ,
- .proxy_delay = (8 * HZ) / 10,
- .proxy_qlen = 64,
- .locktime = 1 * HZ,
- },
- .gc_interval = 30 * HZ,
- .gc_thresh1 = 128,
- .gc_thresh2 = 512,
- .gc_thresh3 = 1024,
-};
-
/* @@@ copy bh locking from arp.c -- need to bh-enable atm code before */
/*
@@ -376,28 +338,19 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- n = dst_get_neighbour(dst);
+ n = dst_get_neighbour_noref(dst);
if (!n) {
-#if 0
- n = clip_find_neighbour(skb_dst(skb), 1);
- if (!n) {
- dev_kfree_skb(skb); /* lost that one */
- dev->stats.tx_dropped++;
- return 0;
- }
- dst_set_neighbour(dst, n);
-#endif
pr_err("NO NEIGHBOUR !\n");
dev_kfree_skb(skb);
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- entry = NEIGH2ENTRY(n);
+ entry = neighbour_priv(n);
if (!entry->vccs) {
if (time_after(jiffies, entry->expires)) {
/* should be resolved */
entry->expires = jiffies + ATMARP_RETRY_DELAY * HZ;
- to_atmarpd(act_need, PRIV(dev)->number, entry->ip);
+ to_atmarpd(act_need, PRIV(dev)->number, *((__be32 *)n->primary_key));
}
if (entry->neigh->arp_queue.qlen < ATMARP_MAX_UNRES_PACKETS)
skb_queue_tail(&entry->neigh->arp_queue, skb);
@@ -448,10 +401,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
static int clip_mkip(struct atm_vcc *vcc, int timeout)
{
- struct sk_buff_head *rq, queue;
struct clip_vcc *clip_vcc;
- struct sk_buff *skb, *tmp;
- unsigned long flags;
if (!vcc->push)
return -EBADFD;
@@ -472,29 +422,9 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
vcc->push = clip_push;
vcc->pop = clip_pop;
- __skb_queue_head_init(&queue);
- rq = &sk_atm(vcc)->sk_receive_queue;
-
- spin_lock_irqsave(&rq->lock, flags);
- skb_queue_splice_init(rq, &queue);
- spin_unlock_irqrestore(&rq->lock, flags);
-
/* re-process everything received between connection setup and MKIP */
- skb_queue_walk_safe(&queue, skb, tmp) {
- if (!clip_devs) {
- atm_return(vcc, skb->truesize);
- kfree_skb(skb);
- } else {
- struct net_device *dev = skb->dev;
- unsigned int len = skb->len;
-
- skb_get(skb);
- clip_push(vcc, skb);
- dev->stats.rx_packets--;
- dev->stats.rx_bytes -= len;
- kfree_skb(skb);
- }
- }
+ vcc_process_recv_queue(vcc);
+
return 0;
}
@@ -523,11 +453,11 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
rt = ip_route_output(&init_net, ip, 0, 1, 0);
if (IS_ERR(rt))
return PTR_ERR(rt);
- neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
+ neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1);
ip_rt_put(rt);
if (!neigh)
return -ENOMEM;
- entry = NEIGH2ENTRY(neigh);
+ entry = neighbour_priv(neigh);
if (entry != clip_vcc->entry) {
if (!clip_vcc->entry)
pr_debug("add\n");
@@ -544,13 +474,15 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
}
static const struct net_device_ops clip_netdev_ops = {
- .ndo_start_xmit = clip_start_xmit,
+ .ndo_start_xmit = clip_start_xmit,
+ .ndo_neigh_construct = clip_constructor,
};
static void clip_setup(struct net_device *dev)
{
dev->netdev_ops = &clip_netdev_ops;
dev->type = ARPHRD_ATM;
+ dev->neigh_priv_len = sizeof(struct atmarp_entry);
dev->hard_header_len = RFC1483LLC_LEN;
dev->mtu = RFC1626_MTU;
dev->tx_queue_len = 100; /* "normal" queue (packets) */
@@ -604,10 +536,8 @@ static int clip_device_event(struct notifier_block *this, unsigned long event,
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
- if (event == NETDEV_UNREGISTER) {
- neigh_ifdown(&clip_tbl, dev);
+ if (event == NETDEV_UNREGISTER)
return NOTIFY_DONE;
- }
/* ignore non-CLIP devices */
if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops)
@@ -787,9 +717,10 @@ static void svc_addr(struct seq_file *seq, struct sockaddr_atmsvc *addr)
/* This means the neighbour entry has no attached VCC objects. */
#define SEQ_NO_VCC_TOKEN ((void *) 2)
-static void atmarp_info(struct seq_file *seq, struct net_device *dev,
+static void atmarp_info(struct seq_file *seq, struct neighbour *n,
struct atmarp_entry *entry, struct clip_vcc *clip_vcc)
{
+ struct net_device *dev = n->dev;
unsigned long exp;
char buf[17];
int svc, llc, off;
@@ -809,8 +740,7 @@ static void atmarp_info(struct seq_file *seq, struct net_device *dev,
seq_printf(seq, "%-6s%-4s%-4s%5ld ",
dev->name, svc ? "SVC" : "PVC", llc ? "LLC" : "NULL", exp);
- off = scnprintf(buf, sizeof(buf) - 1, "%pI4",
- &entry->ip);
+ off = scnprintf(buf, sizeof(buf) - 1, "%pI4", n->primary_key);
while (off < 16)
buf[off++] = ' ';
buf[off] = '\0';
@@ -881,14 +811,17 @@ static void *clip_seq_sub_iter(struct neigh_seq_state *_state,
{
struct clip_seq_state *state = (struct clip_seq_state *)_state;
- return clip_seq_vcc_walk(state, NEIGH2ENTRY(n), pos);
+ if (n->dev->type != ARPHRD_ATM)
+ return NULL;
+
+ return clip_seq_vcc_walk(state, neighbour_priv(n), pos);
}
static void *clip_seq_start(struct seq_file *seq, loff_t * pos)
{
struct clip_seq_state *state = seq->private;
state->ns.neigh_sub_iter = clip_seq_sub_iter;
- return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY);
+ return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_NEIGH_ONLY);
}
static int clip_seq_show(struct seq_file *seq, void *v)
@@ -900,10 +833,10 @@ static int clip_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, atm_arp_banner);
} else {
struct clip_seq_state *state = seq->private;
- struct neighbour *n = v;
struct clip_vcc *vcc = state->vcc;
+ struct neighbour *n = v;
- atmarp_info(seq, n->dev, NEIGH2ENTRY(n), vcc);
+ atmarp_info(seq, n, neighbour_priv(n), vcc);
}
return 0;
}
@@ -934,9 +867,6 @@ static void atm_clip_exit_noproc(void);
static int __init atm_clip_init(void)
{
- neigh_table_init_no_netlink(&clip_tbl);
-
- clip_tbl_hook = &clip_tbl;
register_atm_ioctl(&clip_ioctl_ops);
register_netdevice_notifier(&clip_dev_notifier);
register_inetaddr_notifier(&clip_inet_notifier);
@@ -973,12 +903,6 @@ static void atm_clip_exit_noproc(void)
*/
del_timer_sync(&idle_timer);
- /* Next, purge the table, so that the device
- * unregister loop below does not hang due to
- * device references remaining in the table.
- */
- neigh_ifdown(&clip_tbl, NULL);
-
dev = clip_devs;
while (dev) {
next = PRIV(dev)->next;
@@ -986,11 +910,6 @@ static void atm_clip_exit_noproc(void)
free_netdev(dev);
dev = next;
}
-
- /* Now it is safe to fully shutdown whole table. */
- neigh_table_clear(&clip_tbl);
-
- clip_tbl_hook = NULL;
}
static void __exit atm_clip_exit(void)
diff --git a/net/atm/common.c b/net/atm/common.c
index 14ff9fe39989..b4b44dbed645 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -214,6 +214,26 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
}
EXPORT_SYMBOL(vcc_release_async);
+void vcc_process_recv_queue(struct atm_vcc *vcc)
+{
+ struct sk_buff_head queue, *rq;
+ struct sk_buff *skb, *tmp;
+ unsigned long flags;
+
+ __skb_queue_head_init(&queue);
+ rq = &sk_atm(vcc)->sk_receive_queue;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ skb_queue_splice_init(rq, &queue);
+ spin_unlock_irqrestore(&rq->lock, flags);
+
+ skb_queue_walk_safe(&queue, skb, tmp) {
+ __skb_unlink(skb, &queue);
+ vcc->push(vcc, skb);
+ }
+}
+EXPORT_SYMBOL(vcc_process_recv_queue);
+
void atm_dev_signal_change(struct atm_dev *dev, char signal)
{
pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
@@ -502,8 +522,11 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
if (sock->state != SS_CONNECTED)
return -ENOTCONN;
- if (flags & ~MSG_DONTWAIT) /* only handle MSG_DONTWAIT */
+
+ /* only handle MSG_DONTWAIT and MSG_PEEK */
+ if (flags & ~(MSG_DONTWAIT | MSG_PEEK))
return -EOPNOTSUPP;
+
vcc = ATM_SD(sock);
if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
test_bit(ATM_VF_CLOSE, &vcc->flags) ||
@@ -524,8 +547,13 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
if (error)
return error;
sock_recv_ts_and_drops(msg, sk, skb);
- pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
- atm_return(vcc, skb->truesize);
+
+ if (!(flags & MSG_PEEK)) {
+ pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc),
+ skb->truesize);
+ atm_return(vcc, skb->truesize);
+ }
+
skb_free_datagram(sk, skb);
return copied;
}
diff --git a/net/atm/common.h b/net/atm/common.h
index f48a76b6cdf4..cc3c2dae4d79 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -24,6 +24,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen);
int vcc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen);
+void vcc_process_recv_queue(struct atm_vcc *vcc);
int atmpvc_init(void);
void atmpvc_exit(void);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index db4a11c61d15..df35d9a3b5fe 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -303,6 +303,10 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
atmvcc->push = pppoatm_push;
atmvcc->pop = pppoatm_pop;
__module_get(THIS_MODULE);
+
+ /* re-process everything received between connection setup and
+ backend setup */
+ vcc_process_recv_queue(atmvcc);
return 0;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index e7c69f4619ec..3cd0a0dc91cb 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -402,14 +402,14 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
break;
case AX25_T1:
- if (ax25_ctl.arg < 1)
+ if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ)
goto einval_put;
ax25->rtt = (ax25_ctl.arg * HZ) / 2;
ax25->t1 = ax25_ctl.arg * HZ;
break;
case AX25_T2:
- if (ax25_ctl.arg < 1)
+ if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ)
goto einval_put;
ax25->t2 = ax25_ctl.arg * HZ;
break;
@@ -422,10 +422,15 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
break;
case AX25_T3:
+ if (ax25_ctl.arg > ULONG_MAX / HZ)
+ goto einval_put;
ax25->t3 = ax25_ctl.arg * HZ;
break;
case AX25_IDLE:
+ if (ax25_ctl.arg > ULONG_MAX / (60 * HZ))
+ goto einval_put;
+
ax25->idle = ax25_ctl.arg * 60 * HZ;
break;
@@ -540,15 +545,16 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
ax25_cb *ax25;
struct net_device *dev;
char devname[IFNAMSIZ];
- int opt, res = 0;
+ unsigned long opt;
+ int res = 0;
if (level != SOL_AX25)
return -ENOPROTOOPT;
- if (optlen < sizeof(int))
+ if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (get_user(opt, (int __user *)optval))
+ if (get_user(opt, (unsigned int __user *)optval))
return -EFAULT;
lock_sock(sk);
@@ -571,7 +577,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case AX25_T1:
- if (opt < 1) {
+ if (opt < 1 || opt > ULONG_MAX / HZ) {
res = -EINVAL;
break;
}
@@ -580,7 +586,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case AX25_T2:
- if (opt < 1) {
+ if (opt < 1 || opt > ULONG_MAX / HZ) {
res = -EINVAL;
break;
}
@@ -596,7 +602,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case AX25_T3:
- if (opt < 1) {
+ if (opt < 1 || opt > ULONG_MAX / HZ) {
res = -EINVAL;
break;
}
@@ -604,7 +610,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case AX25_IDLE:
- if (opt < 0) {
+ if (opt > ULONG_MAX / (60 * HZ)) {
res = -EINVAL;
break;
}
@@ -612,7 +618,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case AX25_BACKOFF:
- if (opt < 0 || opt > 2) {
+ if (opt > 2) {
res = -EINVAL;
break;
}
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
index b8a7414c3571..c25492f7d665 100644
--- a/net/batman-adv/bat_sysfs.c
+++ b/net/batman-adv/bat_sysfs.c
@@ -174,7 +174,7 @@ static int store_uint_attr(const char *buff, size_t count,
unsigned long uint_val;
int ret;
- ret = strict_strtoul(buff, 10, &uint_val);
+ ret = kstrtoul(buff, 10, &uint_val);
if (ret) {
bat_info(net_dev,
"%s: Invalid parameter received: %s\n",
@@ -239,7 +239,7 @@ static ssize_t store_vis_mode(struct kobject *kobj, struct attribute *attr,
unsigned long val;
int ret, vis_mode_tmp = -1;
- ret = strict_strtoul(buff, 10, &val);
+ ret = kstrtoul(buff, 10, &val);
if (((count == 2) && (!ret) && (val == VIS_TYPE_CLIENT_UPDATE)) ||
(strncmp(buff, "client", 6) == 0) ||
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 0be9ff346fa0..9bc63b209b3f 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -155,7 +155,7 @@ int bit_get_packet(void *priv, unsigned long *seq_bits,
/* sequence number is much newer, probably missed a lot of packets */
if ((seq_num_diff >= TQ_LOCAL_WINDOW_SIZE)
- || (seq_num_diff < EXPECTED_SEQNO_RANGE)) {
+ && (seq_num_diff < EXPECTED_SEQNO_RANGE)) {
bat_dbg(DBG_BATMAN, bat_priv,
"We missed a lot of packets (%i) !\n",
seq_num_diff - 1);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 619fb73b3b76..24403a7350f7 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -25,6 +25,7 @@
#include "gateway_common.h"
#include "hard-interface.h"
#include "originator.h"
+#include "translation-table.h"
#include "routing.h"
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -572,108 +573,142 @@ out:
return ret;
}
-int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb,
- struct orig_node *old_gw)
+bool gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
{
struct ethhdr *ethhdr;
struct iphdr *iphdr;
struct ipv6hdr *ipv6hdr;
struct udphdr *udphdr;
- struct gw_node *curr_gw;
- struct neigh_node *neigh_curr = NULL, *neigh_old = NULL;
- unsigned int header_len = 0;
- int ret = 1;
-
- if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF)
- return 0;
/* check for ethernet header */
- if (!pskb_may_pull(skb, header_len + ETH_HLEN))
- return 0;
+ if (!pskb_may_pull(skb, *header_len + ETH_HLEN))
+ return false;
ethhdr = (struct ethhdr *)skb->data;
- header_len += ETH_HLEN;
+ *header_len += ETH_HLEN;
/* check for initial vlan header */
if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) {
- if (!pskb_may_pull(skb, header_len + VLAN_HLEN))
- return 0;
+ if (!pskb_may_pull(skb, *header_len + VLAN_HLEN))
+ return false;
ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
- header_len += VLAN_HLEN;
+ *header_len += VLAN_HLEN;
}
/* check for ip header */
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_IP:
- if (!pskb_may_pull(skb, header_len + sizeof(*iphdr)))
- return 0;
- iphdr = (struct iphdr *)(skb->data + header_len);
- header_len += iphdr->ihl * 4;
+ if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr)))
+ return false;
+ iphdr = (struct iphdr *)(skb->data + *header_len);
+ *header_len += iphdr->ihl * 4;
/* check for udp header */
if (iphdr->protocol != IPPROTO_UDP)
- return 0;
+ return false;
break;
case ETH_P_IPV6:
- if (!pskb_may_pull(skb, header_len + sizeof(*ipv6hdr)))
- return 0;
- ipv6hdr = (struct ipv6hdr *)(skb->data + header_len);
- header_len += sizeof(*ipv6hdr);
+ if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr)))
+ return false;
+ ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len);
+ *header_len += sizeof(*ipv6hdr);
/* check for udp header */
if (ipv6hdr->nexthdr != IPPROTO_UDP)
- return 0;
+ return false;
break;
default:
- return 0;
+ return false;
}
- if (!pskb_may_pull(skb, header_len + sizeof(*udphdr)))
- return 0;
- udphdr = (struct udphdr *)(skb->data + header_len);
- header_len += sizeof(*udphdr);
+ if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr)))
+ return false;
+ udphdr = (struct udphdr *)(skb->data + *header_len);
+ *header_len += sizeof(*udphdr);
/* check for bootp port */
if ((ntohs(ethhdr->h_proto) == ETH_P_IP) &&
(ntohs(udphdr->dest) != 67))
- return 0;
+ return false;
if ((ntohs(ethhdr->h_proto) == ETH_P_IPV6) &&
(ntohs(udphdr->dest) != 547))
- return 0;
+ return false;
- if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)
- return -1;
+ return true;
+}
- curr_gw = gw_get_selected_gw_node(bat_priv);
- if (!curr_gw)
- return 0;
-
- /* If old_gw != NULL then this packet is unicast.
- * So, at this point we have to check the message type: if it is a
- * DHCPREQUEST we have to decide whether to drop it or not */
- if (old_gw && curr_gw->orig_node != old_gw) {
- if (is_type_dhcprequest(skb, header_len)) {
- /* If the dhcp packet has been sent to a different gw,
- * we have to evaluate whether the old gw is still
- * reliable enough */
- neigh_curr = find_router(bat_priv, curr_gw->orig_node,
- NULL);
- neigh_old = find_router(bat_priv, old_gw, NULL);
- if (!neigh_curr || !neigh_old)
- goto free_neigh;
- if (neigh_curr->tq_avg - neigh_old->tq_avg <
- GW_THRESHOLD)
- ret = -1;
- }
+bool gw_out_of_range(struct bat_priv *bat_priv,
+ struct sk_buff *skb, struct ethhdr *ethhdr)
+{
+ struct neigh_node *neigh_curr = NULL, *neigh_old = NULL;
+ struct orig_node *orig_dst_node = NULL;
+ struct gw_node *curr_gw = NULL;
+ bool ret, out_of_range = false;
+ unsigned int header_len = 0;
+ uint8_t curr_tq_avg;
+
+ ret = gw_is_dhcp_target(skb, &header_len);
+ if (!ret)
+ goto out;
+
+ orig_dst_node = transtable_search(bat_priv, ethhdr->h_source,
+ ethhdr->h_dest);
+ if (!orig_dst_node)
+ goto out;
+
+ if (!orig_dst_node->gw_flags)
+ goto out;
+
+ ret = is_type_dhcprequest(skb, header_len);
+ if (!ret)
+ goto out;
+
+ switch (atomic_read(&bat_priv->gw_mode)) {
+ case GW_MODE_SERVER:
+ /* If we are a GW then we are our best GW. We can artificially
+ * set the tq towards ourself as the maximum value */
+ curr_tq_avg = TQ_MAX_VALUE;
+ break;
+ case GW_MODE_CLIENT:
+ curr_gw = gw_get_selected_gw_node(bat_priv);
+ if (!curr_gw)
+ goto out;
+
+ /* packet is going to our gateway */
+ if (curr_gw->orig_node == orig_dst_node)
+ goto out;
+
+ /* If the dhcp packet has been sent to a different gw,
+ * we have to evaluate whether the old gw is still
+ * reliable enough */
+ neigh_curr = find_router(bat_priv, curr_gw->orig_node, NULL);
+ if (!neigh_curr)
+ goto out;
+
+ curr_tq_avg = neigh_curr->tq_avg;
+ break;
+ case GW_MODE_OFF:
+ default:
+ goto out;
}
-free_neigh:
+
+ neigh_old = find_router(bat_priv, orig_dst_node, NULL);
+ if (!neigh_old)
+ goto out;
+
+ if (curr_tq_avg - neigh_old->tq_avg > GW_THRESHOLD)
+ out_of_range = true;
+
+out:
+ if (orig_dst_node)
+ orig_node_free_ref(orig_dst_node);
+ if (curr_gw)
+ gw_node_free_ref(curr_gw);
if (neigh_old)
neigh_node_free_ref(neigh_old);
if (neigh_curr)
neigh_node_free_ref(neigh_curr);
- if (curr_gw)
- gw_node_free_ref(curr_gw);
- return ret;
+ return out_of_range;
}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index b9b983c07feb..e1edba08eb1d 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -31,7 +31,8 @@ void gw_node_update(struct bat_priv *bat_priv,
void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node);
void gw_node_purge(struct bat_priv *bat_priv);
int gw_client_seq_print_text(struct seq_file *seq, void *offset);
-int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb,
- struct orig_node *old_gw);
+bool gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len);
+bool gw_out_of_range(struct bat_priv *bat_priv,
+ struct sk_buff *skb, struct ethhdr *ethhdr);
#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 18661af0bc3b..c4ac7b0a2a63 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -97,7 +97,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff,
*tmp_ptr = '\0';
}
- ret = strict_strtol(buff, 10, &ldown);
+ ret = kstrtol(buff, 10, &ldown);
if (ret) {
bat_err(net_dev,
"Download speed of gateway mode invalid: %s\n",
@@ -122,7 +122,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff,
*tmp_ptr = '\0';
}
- ret = strict_strtol(slash_ptr + 1, 10, &lup);
+ ret = kstrtol(slash_ptr + 1, 10, &lup);
if (ret) {
bat_err(net_dev,
"Upload speed of gateway mode invalid: "
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 2a172505f513..d1da29da333b 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -25,7 +25,7 @@
/* clears the hash */
static void hash_init(struct hashtable_t *hash)
{
- int i;
+ uint32_t i;
for (i = 0 ; i < hash->size; i++) {
INIT_HLIST_HEAD(&hash->table[i]);
@@ -42,7 +42,7 @@ void hash_destroy(struct hashtable_t *hash)
}
/* allocates and clears the hash */
-struct hashtable_t *hash_new(int size)
+struct hashtable_t *hash_new(uint32_t size)
{
struct hashtable_t *hash;
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index d20aa71ba1e8..4768717f07f9 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -33,17 +33,17 @@ typedef int (*hashdata_compare_cb)(const struct hlist_node *, const void *);
/* the hashfunction, should return an index
* based on the key in the data of the first
* argument and the size the second */
-typedef int (*hashdata_choose_cb)(const void *, int);
+typedef uint32_t (*hashdata_choose_cb)(const void *, uint32_t);
typedef void (*hashdata_free_cb)(struct hlist_node *, void *);
struct hashtable_t {
struct hlist_head *table; /* the hashtable itself with the buckets */
spinlock_t *list_locks; /* spinlock for each hash list entry */
- int size; /* size of hashtable */
+ uint32_t size; /* size of hashtable */
};
/* allocates and clears the hash */
-struct hashtable_t *hash_new(int size);
+struct hashtable_t *hash_new(uint32_t size);
/* free only the hashtable and the hash itself. */
void hash_destroy(struct hashtable_t *hash);
@@ -57,7 +57,7 @@ static inline void hash_delete(struct hashtable_t *hash,
struct hlist_head *head;
struct hlist_node *node, *node_tmp;
spinlock_t *list_lock; /* spinlock to protect write access */
- int i;
+ uint32_t i;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -93,7 +93,8 @@ static inline int hash_add(struct hashtable_t *hash,
hashdata_choose_cb choose,
const void *data, struct hlist_node *data_node)
{
- int index, ret = -1;
+ uint32_t index;
+ int ret = -1;
struct hlist_head *head;
struct hlist_node *node;
spinlock_t *list_lock; /* spinlock to protect write access */
@@ -137,7 +138,7 @@ static inline void *hash_remove(struct hashtable_t *hash,
hashdata_compare_cb compare,
hashdata_choose_cb choose, void *data)
{
- size_t index;
+ uint32_t index;
struct hlist_node *node;
struct hlist_head *head;
void *data_save = NULL;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index ac3520e057c0..d9c1e7bb7fbf 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -136,10 +136,9 @@ static ssize_t bat_socket_read(struct file *file, char __user *buf,
spin_unlock_bh(&socket_client->lock);
- error = __copy_to_user(buf, &socket_packet->icmp_packet,
- socket_packet->icmp_len);
+ packet_len = min(count, socket_packet->icmp_len);
+ error = copy_to_user(buf, &socket_packet->icmp_packet, packet_len);
- packet_len = socket_packet->icmp_len;
kfree(socket_packet);
if (error)
@@ -187,12 +186,7 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
skb_reserve(skb, sizeof(struct ethhdr));
icmp_packet = (struct icmp_packet_rr *)skb_put(skb, packet_len);
- if (!access_ok(VERIFY_READ, buff, packet_len)) {
- len = -EFAULT;
- goto free_skb;
- }
-
- if (__copy_from_user(icmp_packet, buff, packet_len)) {
+ if (copy_from_user(icmp_packet, buff, packet_len)) {
len = -EFAULT;
goto free_skb;
}
@@ -217,7 +211,7 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
if (icmp_packet->version != COMPAT_VERSION) {
icmp_packet->msg_type = PARAMETER_PROBLEM;
- icmp_packet->ttl = COMPAT_VERSION;
+ icmp_packet->version = COMPAT_VERSION;
bat_socket_add_packet(socket_client, icmp_packet, packet_len);
goto free_skb;
}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 964ad4d8ba33..86354e06eb48 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -28,7 +28,7 @@
#define DRIVER_DEVICE "batman-adv"
#ifndef SOURCE_VERSION
-#define SOURCE_VERSION "2011.4.0"
+#define SOURCE_VERSION "2012.0.0"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 0e5b77255d99..0bc2045a2f2e 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -164,7 +164,7 @@ void originator_free(struct bat_priv *bat_priv)
struct hlist_head *head;
spinlock_t *list_lock; /* spinlock to protect write access */
struct orig_node *orig_node;
- int i;
+ uint32_t i;
if (!hash)
return;
@@ -350,7 +350,7 @@ static void _purge_orig(struct bat_priv *bat_priv)
struct hlist_head *head;
spinlock_t *list_lock; /* spinlock to protect write access */
struct orig_node *orig_node;
- int i;
+ uint32_t i;
if (!hash)
return;
@@ -413,7 +413,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset)
int batman_count = 0;
int last_seen_secs;
int last_seen_msecs;
- int i, ret = 0;
+ uint32_t i;
+ int ret = 0;
primary_if = primary_if_get_selected(bat_priv);
@@ -519,7 +520,8 @@ int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num)
struct hlist_node *node;
struct hlist_head *head;
struct orig_node *orig_node;
- int i, ret;
+ uint32_t i;
+ int ret;
/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
* if_num */
@@ -601,7 +603,8 @@ int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num)
struct hlist_head *head;
struct hard_iface *hard_iface_tmp;
struct orig_node *orig_node;
- int i, ret;
+ uint32_t i;
+ int ret;
/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
* if_num */
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index cfc1f60a96a1..67765ffef731 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -42,7 +42,7 @@ int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num);
/* hashfunction to choose an entry in a hash table of given size */
/* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */
-static inline int choose_orig(const void *data, int32_t size)
+static inline uint32_t choose_orig(const void *data, uint32_t size)
{
const unsigned char *key = data;
uint32_t hash = 0;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f961cc5eade5..773e606f9702 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -39,7 +39,7 @@ void slide_own_bcast_window(struct hard_iface *hard_iface)
struct hlist_head *head;
struct orig_node *orig_node;
unsigned long *word;
- int i;
+ uint32_t i;
size_t word_index;
for (i = 0; i < hash->size; i++) {
@@ -578,6 +578,7 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if)
{
struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct tt_query_packet *tt_query;
+ uint16_t tt_len;
struct ethhdr *ethhdr;
/* drop packet if it has not necessary minimum size */
@@ -616,13 +617,21 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if)
}
break;
case TT_RESPONSE:
- /* packet needs to be linearized to access the TT changes */
- if (skb_linearize(skb) < 0)
- goto out;
+ if (is_my_mac(tt_query->dst)) {
+ /* packet needs to be linearized to access the TT
+ * changes */
+ if (skb_linearize(skb) < 0)
+ goto out;
+
+ tt_len = tt_query->tt_data * sizeof(struct tt_change);
+
+ /* Ensure we have all the claimed data */
+ if (unlikely(skb_headlen(skb) <
+ sizeof(struct tt_query_packet) + tt_len))
+ goto out;
- if (is_my_mac(tt_query->dst))
handle_tt_response(bat_priv, tt_query);
- else {
+ } else {
bat_dbg(DBG_TT, bat_priv,
"Routing TT_RESPONSE to %pM [%c]\n",
tt_query->dst,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index f9cc95728989..987c75a775f9 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -563,10 +563,10 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
struct bcast_packet *bcast_packet;
struct vlan_ethhdr *vhdr;
struct softif_neigh *curr_softif_neigh = NULL;
- struct orig_node *orig_node = NULL;
+ unsigned int header_len = 0;
int data_len = skb->len, ret;
short vid = -1;
- bool do_bcast;
+ bool do_bcast = false;
if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
goto dropped;
@@ -598,17 +598,28 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
/* Register the client MAC in the transtable */
tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
- orig_node = transtable_search(bat_priv, ethhdr->h_source,
- ethhdr->h_dest);
- do_bcast = is_multicast_ether_addr(ethhdr->h_dest);
- if (do_bcast || (orig_node && orig_node->gw_flags)) {
- ret = gw_is_target(bat_priv, skb, orig_node);
+ if (is_multicast_ether_addr(ethhdr->h_dest)) {
+ do_bcast = true;
- if (ret < 0)
- goto dropped;
-
- if (ret)
- do_bcast = false;
+ switch (atomic_read(&bat_priv->gw_mode)) {
+ case GW_MODE_SERVER:
+ /* gateway servers should not send dhcp
+ * requests into the mesh */
+ ret = gw_is_dhcp_target(skb, &header_len);
+ if (ret)
+ goto dropped;
+ break;
+ case GW_MODE_CLIENT:
+ /* gateway clients should send dhcp requests
+ * via unicast to their gateway */
+ ret = gw_is_dhcp_target(skb, &header_len);
+ if (ret)
+ do_bcast = false;
+ break;
+ case GW_MODE_OFF:
+ default:
+ break;
+ }
}
/* ethernet packet should be broadcasted */
@@ -644,6 +655,12 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
/* unicast packet */
} else {
+ if (atomic_read(&bat_priv->gw_mode) != GW_MODE_OFF) {
+ ret = gw_out_of_range(bat_priv, skb, ethhdr);
+ if (ret)
+ goto dropped;
+ }
+
ret = unicast_send_skb(skb, bat_priv);
if (ret != 0)
goto dropped_freed;
@@ -662,8 +679,6 @@ end:
softif_neigh_free_ref(curr_softif_neigh);
if (primary_if)
hardif_free_ref(primary_if);
- if (orig_node)
- orig_node_free_ref(orig_node);
return NETDEV_TX_OK;
}
@@ -859,7 +874,7 @@ unreg_debugfs:
unreg_sysfs:
sysfs_del_meshif(soft_iface);
unreg_soft_iface:
- unregister_netdev(soft_iface);
+ unregister_netdevice(soft_iface);
return NULL;
free_soft_iface:
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index c7aafc7c5ed4..ab8dea8b0b2e 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -36,18 +36,9 @@ static void _tt_global_del(struct bat_priv *bat_priv,
static void tt_purge(struct work_struct *work);
/* returns 1 if they are the same mac addr */
-static int compare_ltt(const struct hlist_node *node, const void *data2)
+static int compare_tt(const struct hlist_node *node, const void *data2)
{
- const void *data1 = container_of(node, struct tt_local_entry,
- hash_entry);
-
- return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
-}
-
-/* returns 1 if they are the same mac addr */
-static int compare_gtt(const struct hlist_node *node, const void *data2)
-{
- const void *data1 = container_of(node, struct tt_global_entry,
+ const void *data1 = container_of(node, struct tt_common_entry,
hash_entry);
return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
@@ -60,14 +51,13 @@ static void tt_start_timer(struct bat_priv *bat_priv)
msecs_to_jiffies(5000));
}
-static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv,
- const void *data)
+static struct tt_common_entry *tt_hash_find(struct hashtable_t *hash,
+ const void *data)
{
- struct hashtable_t *hash = bat_priv->tt_local_hash;
struct hlist_head *head;
struct hlist_node *node;
- struct tt_local_entry *tt_local_entry, *tt_local_entry_tmp = NULL;
- int index;
+ struct tt_common_entry *tt_common_entry, *tt_common_entry_tmp = NULL;
+ uint32_t index;
if (!hash)
return NULL;
@@ -76,51 +66,46 @@ static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv,
head = &hash->table[index];
rcu_read_lock();
- hlist_for_each_entry_rcu(tt_local_entry, node, head, hash_entry) {
- if (!compare_eth(tt_local_entry, data))
+ hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) {
+ if (!compare_eth(tt_common_entry, data))
continue;
- if (!atomic_inc_not_zero(&tt_local_entry->refcount))
+ if (!atomic_inc_not_zero(&tt_common_entry->refcount))
continue;
- tt_local_entry_tmp = tt_local_entry;
+ tt_common_entry_tmp = tt_common_entry;
break;
}
rcu_read_unlock();
- return tt_local_entry_tmp;
+ return tt_common_entry_tmp;
}
-static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv,
- const void *data)
+static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv,
+ const void *data)
{
- struct hashtable_t *hash = bat_priv->tt_global_hash;
- struct hlist_head *head;
- struct hlist_node *node;
- struct tt_global_entry *tt_global_entry;
- struct tt_global_entry *tt_global_entry_tmp = NULL;
- int index;
-
- if (!hash)
- return NULL;
-
- index = choose_orig(data, hash->size);
- head = &hash->table[index];
+ struct tt_common_entry *tt_common_entry;
+ struct tt_local_entry *tt_local_entry = NULL;
- rcu_read_lock();
- hlist_for_each_entry_rcu(tt_global_entry, node, head, hash_entry) {
- if (!compare_eth(tt_global_entry, data))
- continue;
+ tt_common_entry = tt_hash_find(bat_priv->tt_local_hash, data);
+ if (tt_common_entry)
+ tt_local_entry = container_of(tt_common_entry,
+ struct tt_local_entry, common);
+ return tt_local_entry;
+}
- if (!atomic_inc_not_zero(&tt_global_entry->refcount))
- continue;
+static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv,
+ const void *data)
+{
+ struct tt_common_entry *tt_common_entry;
+ struct tt_global_entry *tt_global_entry = NULL;
- tt_global_entry_tmp = tt_global_entry;
- break;
- }
- rcu_read_unlock();
+ tt_common_entry = tt_hash_find(bat_priv->tt_global_hash, data);
+ if (tt_common_entry)
+ tt_global_entry = container_of(tt_common_entry,
+ struct tt_global_entry, common);
+ return tt_global_entry;
- return tt_global_entry_tmp;
}
static bool is_out_of_time(unsigned long starting_time, unsigned long timeout)
@@ -133,15 +118,18 @@ static bool is_out_of_time(unsigned long starting_time, unsigned long timeout)
static void tt_local_entry_free_ref(struct tt_local_entry *tt_local_entry)
{
- if (atomic_dec_and_test(&tt_local_entry->refcount))
- kfree_rcu(tt_local_entry, rcu);
+ if (atomic_dec_and_test(&tt_local_entry->common.refcount))
+ kfree_rcu(tt_local_entry, common.rcu);
}
static void tt_global_entry_free_rcu(struct rcu_head *rcu)
{
+ struct tt_common_entry *tt_common_entry;
struct tt_global_entry *tt_global_entry;
- tt_global_entry = container_of(rcu, struct tt_global_entry, rcu);
+ tt_common_entry = container_of(rcu, struct tt_common_entry, rcu);
+ tt_global_entry = container_of(tt_common_entry, struct tt_global_entry,
+ common);
if (tt_global_entry->orig_node)
orig_node_free_ref(tt_global_entry->orig_node);
@@ -151,8 +139,9 @@ static void tt_global_entry_free_rcu(struct rcu_head *rcu)
static void tt_global_entry_free_ref(struct tt_global_entry *tt_global_entry)
{
- if (atomic_dec_and_test(&tt_global_entry->refcount))
- call_rcu(&tt_global_entry->rcu, tt_global_entry_free_rcu);
+ if (atomic_dec_and_test(&tt_global_entry->common.refcount))
+ call_rcu(&tt_global_entry->common.rcu,
+ tt_global_entry_free_rcu);
}
static void tt_local_event(struct bat_priv *bat_priv, const uint8_t *addr,
@@ -201,6 +190,7 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
struct bat_priv *bat_priv = netdev_priv(soft_iface);
struct tt_local_entry *tt_local_entry = NULL;
struct tt_global_entry *tt_global_entry = NULL;
+ int hash_added;
tt_local_entry = tt_local_hash_find(bat_priv, addr);
@@ -217,26 +207,33 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
"Creating new local tt entry: %pM (ttvn: %d)\n", addr,
(uint8_t)atomic_read(&bat_priv->ttvn));
- memcpy(tt_local_entry->addr, addr, ETH_ALEN);
- tt_local_entry->last_seen = jiffies;
- tt_local_entry->flags = NO_FLAGS;
+ memcpy(tt_local_entry->common.addr, addr, ETH_ALEN);
+ tt_local_entry->common.flags = NO_FLAGS;
if (is_wifi_iface(ifindex))
- tt_local_entry->flags |= TT_CLIENT_WIFI;
- atomic_set(&tt_local_entry->refcount, 2);
+ tt_local_entry->common.flags |= TT_CLIENT_WIFI;
+ atomic_set(&tt_local_entry->common.refcount, 2);
+ tt_local_entry->last_seen = jiffies;
/* the batman interface mac address should never be purged */
if (compare_eth(addr, soft_iface->dev_addr))
- tt_local_entry->flags |= TT_CLIENT_NOPURGE;
+ tt_local_entry->common.flags |= TT_CLIENT_NOPURGE;
- tt_local_event(bat_priv, addr, tt_local_entry->flags);
+ hash_added = hash_add(bat_priv->tt_local_hash, compare_tt, choose_orig,
+ &tt_local_entry->common,
+ &tt_local_entry->common.hash_entry);
+
+ if (unlikely(hash_added != 0)) {
+ /* remove the reference for the hash */
+ tt_local_entry_free_ref(tt_local_entry);
+ goto out;
+ }
+
+ tt_local_event(bat_priv, addr, tt_local_entry->common.flags);
/* The local entry has to be marked as NEW to avoid to send it in
* a full table response going out before the next ttvn increment
* (consistency check) */
- tt_local_entry->flags |= TT_CLIENT_NEW;
-
- hash_add(bat_priv->tt_local_hash, compare_ltt, choose_orig,
- tt_local_entry, &tt_local_entry->hash_entry);
+ tt_local_entry->common.flags |= TT_CLIENT_NEW;
/* remove address from global hash if present */
tt_global_entry = tt_global_hash_find(bat_priv, addr);
@@ -245,10 +242,11 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (tt_global_entry) {
/* This node is probably going to update its tt table */
tt_global_entry->orig_node->tt_poss_change = true;
- /* The global entry has to be marked as PENDING and has to be
+ /* The global entry has to be marked as ROAMING and has to be
* kept for consistency purpose */
- tt_global_entry->flags |= TT_CLIENT_PENDING;
- send_roam_adv(bat_priv, tt_global_entry->addr,
+ tt_global_entry->common.flags |= TT_CLIENT_ROAM;
+ tt_global_entry->roam_at = jiffies;
+ send_roam_adv(bat_priv, tt_global_entry->common.addr,
tt_global_entry->orig_node);
}
out:
@@ -310,13 +308,12 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct net_device *net_dev = (struct net_device *)seq->private;
struct bat_priv *bat_priv = netdev_priv(net_dev);
struct hashtable_t *hash = bat_priv->tt_local_hash;
- struct tt_local_entry *tt_local_entry;
+ struct tt_common_entry *tt_common_entry;
struct hard_iface *primary_if;
struct hlist_node *node;
struct hlist_head *head;
- size_t buf_size, pos;
- char *buff;
- int i, ret = 0;
+ uint32_t i;
+ int ret = 0;
primary_if = primary_if_get_selected(bat_priv);
if (!primary_if) {
@@ -337,51 +334,27 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset)
"announced via TT (TTVN: %u):\n",
net_dev->name, (uint8_t)atomic_read(&bat_priv->ttvn));
- buf_size = 1;
- /* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
- __hlist_for_each_rcu(node, head)
- buf_size += 29;
- rcu_read_unlock();
- }
-
- buff = kmalloc(buf_size, GFP_ATOMIC);
- if (!buff) {
- ret = -ENOMEM;
- goto out;
- }
-
- buff[0] = '\0';
- pos = 0;
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tt_local_entry, node,
+ hlist_for_each_entry_rcu(tt_common_entry, node,
head, hash_entry) {
- pos += snprintf(buff + pos, 30, " * %pM "
- "[%c%c%c%c%c]\n",
- tt_local_entry->addr,
- (tt_local_entry->flags &
+ seq_printf(seq, " * %pM [%c%c%c%c%c]\n",
+ tt_common_entry->addr,
+ (tt_common_entry->flags &
TT_CLIENT_ROAM ? 'R' : '.'),
- (tt_local_entry->flags &
+ (tt_common_entry->flags &
TT_CLIENT_NOPURGE ? 'P' : '.'),
- (tt_local_entry->flags &
+ (tt_common_entry->flags &
TT_CLIENT_NEW ? 'N' : '.'),
- (tt_local_entry->flags &
+ (tt_common_entry->flags &
TT_CLIENT_PENDING ? 'X' : '.'),
- (tt_local_entry->flags &
+ (tt_common_entry->flags &
TT_CLIENT_WIFI ? 'W' : '.'));
}
rcu_read_unlock();
}
-
- seq_printf(seq, "%s", buff);
- kfree(buff);
out:
if (primary_if)
hardif_free_ref(primary_if);
@@ -392,13 +365,13 @@ static void tt_local_set_pending(struct bat_priv *bat_priv,
struct tt_local_entry *tt_local_entry,
uint16_t flags)
{
- tt_local_event(bat_priv, tt_local_entry->addr,
- tt_local_entry->flags | flags);
+ tt_local_event(bat_priv, tt_local_entry->common.addr,
+ tt_local_entry->common.flags | flags);
/* The local client has to be marked as "pending to be removed" but has
* to be kept in the table in order to send it in a full table
* response issued before the net ttvn increment (consistency check) */
- tt_local_entry->flags |= TT_CLIENT_PENDING;
+ tt_local_entry->common.flags |= TT_CLIENT_PENDING;
}
void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr,
@@ -414,7 +387,7 @@ void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr,
(roaming ? TT_CLIENT_ROAM : NO_FLAGS));
bat_dbg(DBG_TT, bat_priv, "Local tt entry (%pM) pending to be removed: "
- "%s\n", tt_local_entry->addr, message);
+ "%s\n", tt_local_entry->common.addr, message);
out:
if (tt_local_entry)
tt_local_entry_free_ref(tt_local_entry);
@@ -424,23 +397,27 @@ static void tt_local_purge(struct bat_priv *bat_priv)
{
struct hashtable_t *hash = bat_priv->tt_local_hash;
struct tt_local_entry *tt_local_entry;
+ struct tt_common_entry *tt_common_entry;
struct hlist_node *node, *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
- int i;
+ uint32_t i;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(tt_local_entry, node, node_tmp,
+ hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
head, hash_entry) {
- if (tt_local_entry->flags & TT_CLIENT_NOPURGE)
+ tt_local_entry = container_of(tt_common_entry,
+ struct tt_local_entry,
+ common);
+ if (tt_local_entry->common.flags & TT_CLIENT_NOPURGE)
continue;
/* entry already marked for deletion */
- if (tt_local_entry->flags & TT_CLIENT_PENDING)
+ if (tt_local_entry->common.flags & TT_CLIENT_PENDING)
continue;
if (!is_out_of_time(tt_local_entry->last_seen,
@@ -451,7 +428,7 @@ static void tt_local_purge(struct bat_priv *bat_priv)
TT_CLIENT_DEL);
bat_dbg(DBG_TT, bat_priv, "Local tt entry (%pM) "
"pending to be removed: timed out\n",
- tt_local_entry->addr);
+ tt_local_entry->common.addr);
}
spin_unlock_bh(list_lock);
}
@@ -462,10 +439,11 @@ static void tt_local_table_free(struct bat_priv *bat_priv)
{
struct hashtable_t *hash;
spinlock_t *list_lock; /* protects write access to the hash lists */
+ struct tt_common_entry *tt_common_entry;
struct tt_local_entry *tt_local_entry;
struct hlist_node *node, *node_tmp;
struct hlist_head *head;
- int i;
+ uint32_t i;
if (!bat_priv->tt_local_hash)
return;
@@ -477,9 +455,12 @@ static void tt_local_table_free(struct bat_priv *bat_priv)
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(tt_local_entry, node, node_tmp,
+ hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
head, hash_entry) {
hlist_del_rcu(node);
+ tt_local_entry = container_of(tt_common_entry,
+ struct tt_local_entry,
+ common);
tt_local_entry_free_ref(tt_local_entry);
}
spin_unlock_bh(list_lock);
@@ -527,6 +508,7 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
struct tt_global_entry *tt_global_entry;
struct orig_node *orig_node_tmp;
int ret = 0;
+ int hash_added;
tt_global_entry = tt_global_hash_find(bat_priv, tt_addr);
@@ -537,18 +519,24 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
if (!tt_global_entry)
goto out;
- memcpy(tt_global_entry->addr, tt_addr, ETH_ALEN);
+ memcpy(tt_global_entry->common.addr, tt_addr, ETH_ALEN);
+ tt_global_entry->common.flags = NO_FLAGS;
+ atomic_set(&tt_global_entry->common.refcount, 2);
/* Assign the new orig_node */
atomic_inc(&orig_node->refcount);
tt_global_entry->orig_node = orig_node;
tt_global_entry->ttvn = ttvn;
- tt_global_entry->flags = NO_FLAGS;
tt_global_entry->roam_at = 0;
- atomic_set(&tt_global_entry->refcount, 2);
- hash_add(bat_priv->tt_global_hash, compare_gtt,
- choose_orig, tt_global_entry,
- &tt_global_entry->hash_entry);
+ hash_added = hash_add(bat_priv->tt_global_hash, compare_tt,
+ choose_orig, &tt_global_entry->common,
+ &tt_global_entry->common.hash_entry);
+
+ if (unlikely(hash_added != 0)) {
+ /* remove the reference for the hash */
+ tt_global_entry_free_ref(tt_global_entry);
+ goto out_remove;
+ }
atomic_inc(&orig_node->tt_size);
} else {
if (tt_global_entry->orig_node != orig_node) {
@@ -559,20 +547,21 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
orig_node_free_ref(orig_node_tmp);
atomic_inc(&orig_node->tt_size);
}
+ tt_global_entry->common.flags = NO_FLAGS;
tt_global_entry->ttvn = ttvn;
- tt_global_entry->flags = NO_FLAGS;
tt_global_entry->roam_at = 0;
}
if (wifi)
- tt_global_entry->flags |= TT_CLIENT_WIFI;
+ tt_global_entry->common.flags |= TT_CLIENT_WIFI;
bat_dbg(DBG_TT, bat_priv,
"Creating new global tt entry: %pM (via %pM)\n",
- tt_global_entry->addr, orig_node->orig);
+ tt_global_entry->common.addr, orig_node->orig);
+out_remove:
/* remove address from local hash if present */
- tt_local_remove(bat_priv, tt_global_entry->addr,
+ tt_local_remove(bat_priv, tt_global_entry->common.addr,
"global tt received", roaming);
ret = 1;
out:
@@ -586,13 +575,13 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
struct net_device *net_dev = (struct net_device *)seq->private;
struct bat_priv *bat_priv = netdev_priv(net_dev);
struct hashtable_t *hash = bat_priv->tt_global_hash;
+ struct tt_common_entry *tt_common_entry;
struct tt_global_entry *tt_global_entry;
struct hard_iface *primary_if;
struct hlist_node *node;
struct hlist_head *head;
- size_t buf_size, pos;
- char *buff;
- int i, ret = 0;
+ uint32_t i;
+ int ret = 0;
primary_if = primary_if_get_selected(bat_priv);
if (!primary_if) {
@@ -615,53 +604,32 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, " %-13s %s %-15s %s %s\n",
"Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags");
- buf_size = 1;
- /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via
- * xx:xx:xx:xx:xx:xx (cur_ttvn)\n"*/
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
- __hlist_for_each_rcu(node, head)
- buf_size += 67;
- rcu_read_unlock();
- }
-
- buff = kmalloc(buf_size, GFP_ATOMIC);
- if (!buff) {
- ret = -ENOMEM;
- goto out;
- }
-
- buff[0] = '\0';
- pos = 0;
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tt_global_entry, node,
+ hlist_for_each_entry_rcu(tt_common_entry, node,
head, hash_entry) {
- pos += snprintf(buff + pos, 69,
- " * %pM (%3u) via %pM (%3u) "
- "[%c%c%c]\n", tt_global_entry->addr,
+ tt_global_entry = container_of(tt_common_entry,
+ struct tt_global_entry,
+ common);
+ seq_printf(seq, " * %pM (%3u) via %pM (%3u) "
+ "[%c%c%c]\n",
+ tt_global_entry->common.addr,
tt_global_entry->ttvn,
tt_global_entry->orig_node->orig,
(uint8_t) atomic_read(
&tt_global_entry->orig_node->
last_ttvn),
- (tt_global_entry->flags &
+ (tt_global_entry->common.flags &
TT_CLIENT_ROAM ? 'R' : '.'),
- (tt_global_entry->flags &
+ (tt_global_entry->common.flags &
TT_CLIENT_PENDING ? 'X' : '.'),
- (tt_global_entry->flags &
+ (tt_global_entry->common.flags &
TT_CLIENT_WIFI ? 'W' : '.'));
}
rcu_read_unlock();
}
-
- seq_printf(seq, "%s", buff);
- kfree(buff);
out:
if (primary_if)
hardif_free_ref(primary_if);
@@ -677,13 +645,13 @@ static void _tt_global_del(struct bat_priv *bat_priv,
bat_dbg(DBG_TT, bat_priv,
"Deleting global tt entry %pM (via %pM): %s\n",
- tt_global_entry->addr, tt_global_entry->orig_node->orig,
+ tt_global_entry->common.addr, tt_global_entry->orig_node->orig,
message);
atomic_dec(&tt_global_entry->orig_node->tt_size);
- hash_remove(bat_priv->tt_global_hash, compare_gtt, choose_orig,
- tt_global_entry->addr);
+ hash_remove(bat_priv->tt_global_hash, compare_tt, choose_orig,
+ tt_global_entry->common.addr);
out:
if (tt_global_entry)
tt_global_entry_free_ref(tt_global_entry);
@@ -694,6 +662,7 @@ void tt_global_del(struct bat_priv *bat_priv,
const char *message, bool roaming)
{
struct tt_global_entry *tt_global_entry = NULL;
+ struct tt_local_entry *tt_local_entry = NULL;
tt_global_entry = tt_global_hash_find(bat_priv, addr);
if (!tt_global_entry)
@@ -701,22 +670,37 @@ void tt_global_del(struct bat_priv *bat_priv,
if (tt_global_entry->orig_node == orig_node) {
if (roaming) {
- tt_global_entry->flags |= TT_CLIENT_ROAM;
- tt_global_entry->roam_at = jiffies;
- goto out;
+ /* if we are deleting a global entry due to a roam
+ * event, there are two possibilities:
+ * 1) the client roamed from node A to node B => we mark
+ * it with TT_CLIENT_ROAM, we start a timer and we
+ * wait for node B to claim it. In case of timeout
+ * the entry is purged.
+ * 2) the client roamed to us => we can directly delete
+ * the global entry, since it is useless now. */
+ tt_local_entry = tt_local_hash_find(bat_priv,
+ tt_global_entry->common.addr);
+ if (!tt_local_entry) {
+ tt_global_entry->common.flags |= TT_CLIENT_ROAM;
+ tt_global_entry->roam_at = jiffies;
+ goto out;
+ }
}
_tt_global_del(bat_priv, tt_global_entry, message);
}
out:
if (tt_global_entry)
tt_global_entry_free_ref(tt_global_entry);
+ if (tt_local_entry)
+ tt_local_entry_free_ref(tt_local_entry);
}
void tt_global_del_orig(struct bat_priv *bat_priv,
struct orig_node *orig_node, const char *message)
{
struct tt_global_entry *tt_global_entry;
- int i;
+ struct tt_common_entry *tt_common_entry;
+ uint32_t i;
struct hashtable_t *hash = bat_priv->tt_global_hash;
struct hlist_node *node, *safe;
struct hlist_head *head;
@@ -730,14 +714,18 @@ void tt_global_del_orig(struct bat_priv *bat_priv,
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(tt_global_entry, node, safe,
+ hlist_for_each_entry_safe(tt_common_entry, node, safe,
head, hash_entry) {
+ tt_global_entry = container_of(tt_common_entry,
+ struct tt_global_entry,
+ common);
if (tt_global_entry->orig_node == orig_node) {
bat_dbg(DBG_TT, bat_priv,
"Deleting global tt entry %pM "
- "(via %pM): originator time out\n",
- tt_global_entry->addr,
- tt_global_entry->orig_node->orig);
+ "(via %pM): %s\n",
+ tt_global_entry->common.addr,
+ tt_global_entry->orig_node->orig,
+ message);
hlist_del_rcu(node);
tt_global_entry_free_ref(tt_global_entry);
}
@@ -750,20 +738,24 @@ void tt_global_del_orig(struct bat_priv *bat_priv,
static void tt_global_roam_purge(struct bat_priv *bat_priv)
{
struct hashtable_t *hash = bat_priv->tt_global_hash;
+ struct tt_common_entry *tt_common_entry;
struct tt_global_entry *tt_global_entry;
struct hlist_node *node, *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
- int i;
+ uint32_t i;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(tt_global_entry, node, node_tmp,
+ hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
head, hash_entry) {
- if (!(tt_global_entry->flags & TT_CLIENT_ROAM))
+ tt_global_entry = container_of(tt_common_entry,
+ struct tt_global_entry,
+ common);
+ if (!(tt_global_entry->common.flags & TT_CLIENT_ROAM))
continue;
if (!is_out_of_time(tt_global_entry->roam_at,
TT_CLIENT_ROAM_TIMEOUT * 1000))
@@ -771,7 +763,7 @@ static void tt_global_roam_purge(struct bat_priv *bat_priv)
bat_dbg(DBG_TT, bat_priv, "Deleting global "
"tt entry (%pM): Roaming timeout\n",
- tt_global_entry->addr);
+ tt_global_entry->common.addr);
atomic_dec(&tt_global_entry->orig_node->tt_size);
hlist_del_rcu(node);
tt_global_entry_free_ref(tt_global_entry);
@@ -785,10 +777,11 @@ static void tt_global_table_free(struct bat_priv *bat_priv)
{
struct hashtable_t *hash;
spinlock_t *list_lock; /* protects write access to the hash lists */
+ struct tt_common_entry *tt_common_entry;
struct tt_global_entry *tt_global_entry;
struct hlist_node *node, *node_tmp;
struct hlist_head *head;
- int i;
+ uint32_t i;
if (!bat_priv->tt_global_hash)
return;
@@ -800,9 +793,12 @@ static void tt_global_table_free(struct bat_priv *bat_priv)
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(tt_global_entry, node, node_tmp,
+ hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
head, hash_entry) {
hlist_del_rcu(node);
+ tt_global_entry = container_of(tt_common_entry,
+ struct tt_global_entry,
+ common);
tt_global_entry_free_ref(tt_global_entry);
}
spin_unlock_bh(list_lock);
@@ -818,8 +814,8 @@ static bool _is_ap_isolated(struct tt_local_entry *tt_local_entry,
{
bool ret = false;
- if (tt_local_entry->flags & TT_CLIENT_WIFI &&
- tt_global_entry->flags & TT_CLIENT_WIFI)
+ if (tt_local_entry->common.flags & TT_CLIENT_WIFI &&
+ tt_global_entry->common.flags & TT_CLIENT_WIFI)
ret = true;
return ret;
@@ -852,7 +848,7 @@ struct orig_node *transtable_search(struct bat_priv *bat_priv,
/* A global client marked as PENDING has already moved from that
* originator */
- if (tt_global_entry->flags & TT_CLIENT_PENDING)
+ if (tt_global_entry->common.flags & TT_CLIENT_PENDING)
goto out;
orig_node = tt_global_entry->orig_node;
@@ -871,29 +867,34 @@ uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node)
{
uint16_t total = 0, total_one;
struct hashtable_t *hash = bat_priv->tt_global_hash;
+ struct tt_common_entry *tt_common_entry;
struct tt_global_entry *tt_global_entry;
struct hlist_node *node;
struct hlist_head *head;
- int i, j;
+ uint32_t i;
+ int j;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
- hlist_for_each_entry_rcu(tt_global_entry, node,
+ hlist_for_each_entry_rcu(tt_common_entry, node,
head, hash_entry) {
+ tt_global_entry = container_of(tt_common_entry,
+ struct tt_global_entry,
+ common);
if (compare_eth(tt_global_entry->orig_node,
orig_node)) {
/* Roaming clients are in the global table for
* consistency only. They don't have to be
* taken into account while computing the
* global crc */
- if (tt_global_entry->flags & TT_CLIENT_ROAM)
+ if (tt_common_entry->flags & TT_CLIENT_ROAM)
continue;
total_one = 0;
for (j = 0; j < ETH_ALEN; j++)
total_one = crc16_byte(total_one,
- tt_global_entry->addr[j]);
+ tt_common_entry->addr[j]);
total ^= total_one;
}
}
@@ -908,25 +909,26 @@ uint16_t tt_local_crc(struct bat_priv *bat_priv)
{
uint16_t total = 0, total_one;
struct hashtable_t *hash = bat_priv->tt_local_hash;
- struct tt_local_entry *tt_local_entry;
+ struct tt_common_entry *tt_common_entry;
struct hlist_node *node;
struct hlist_head *head;
- int i, j;
+ uint32_t i;
+ int j;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
- hlist_for_each_entry_rcu(tt_local_entry, node,
+ hlist_for_each_entry_rcu(tt_common_entry, node,
head, hash_entry) {
/* not yet committed clients have not to be taken into
* account while computing the CRC */
- if (tt_local_entry->flags & TT_CLIENT_NEW)
+ if (tt_common_entry->flags & TT_CLIENT_NEW)
continue;
total_one = 0;
for (j = 0; j < ETH_ALEN; j++)
total_one = crc16_byte(total_one,
- tt_local_entry->addr[j]);
+ tt_common_entry->addr[j]);
total ^= total_one;
}
rcu_read_unlock();
@@ -1015,21 +1017,25 @@ unlock:
/* data_ptr is useless here, but has to be kept to respect the prototype */
static int tt_local_valid_entry(const void *entry_ptr, const void *data_ptr)
{
- const struct tt_local_entry *tt_local_entry = entry_ptr;
+ const struct tt_common_entry *tt_common_entry = entry_ptr;
- if (tt_local_entry->flags & TT_CLIENT_NEW)
+ if (tt_common_entry->flags & TT_CLIENT_NEW)
return 0;
return 1;
}
static int tt_global_valid_entry(const void *entry_ptr, const void *data_ptr)
{
- const struct tt_global_entry *tt_global_entry = entry_ptr;
+ const struct tt_common_entry *tt_common_entry = entry_ptr;
+ const struct tt_global_entry *tt_global_entry;
const struct orig_node *orig_node = data_ptr;
- if (tt_global_entry->flags & TT_CLIENT_ROAM)
+ if (tt_common_entry->flags & TT_CLIENT_ROAM)
return 0;
+ tt_global_entry = container_of(tt_common_entry, struct tt_global_entry,
+ common);
+
return (tt_global_entry->orig_node == orig_node);
}
@@ -1040,7 +1046,7 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
const void *),
void *cb_data)
{
- struct tt_local_entry *tt_local_entry;
+ struct tt_common_entry *tt_common_entry;
struct tt_query_packet *tt_response;
struct tt_change *tt_change;
struct hlist_node *node;
@@ -1048,7 +1054,7 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
struct sk_buff *skb = NULL;
uint16_t tt_tot, tt_count;
ssize_t tt_query_size = sizeof(struct tt_query_packet);
- int i;
+ uint32_t i;
if (tt_query_size + tt_len > primary_if->soft_iface->mtu) {
tt_len = primary_if->soft_iface->mtu - tt_query_size;
@@ -1072,15 +1078,16 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
- hlist_for_each_entry_rcu(tt_local_entry, node,
+ hlist_for_each_entry_rcu(tt_common_entry, node,
head, hash_entry) {
if (tt_count == tt_tot)
break;
- if ((valid_cb) && (!valid_cb(tt_local_entry, cb_data)))
+ if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
continue;
- memcpy(tt_change->addr, tt_local_entry->addr, ETH_ALEN);
+ memcpy(tt_change->addr, tt_common_entry->addr,
+ ETH_ALEN);
tt_change->flags = NO_FLAGS;
tt_count++;
@@ -1187,11 +1194,11 @@ static bool send_other_tt_response(struct bat_priv *bat_priv,
(tt_request->flags & TT_FULL_TABLE ? 'F' : '.'));
/* Let's get the orig node of the REAL destination */
- req_dst_orig_node = get_orig_node(bat_priv, tt_request->dst);
+ req_dst_orig_node = orig_hash_find(bat_priv, tt_request->dst);
if (!req_dst_orig_node)
goto out;
- res_dst_orig_node = get_orig_node(bat_priv, tt_request->src);
+ res_dst_orig_node = orig_hash_find(bat_priv, tt_request->src);
if (!res_dst_orig_node)
goto out;
@@ -1317,7 +1324,7 @@ static bool send_my_tt_response(struct bat_priv *bat_priv,
my_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn);
req_ttvn = tt_request->ttvn;
- orig_node = get_orig_node(bat_priv, tt_request->src);
+ orig_node = orig_hash_find(bat_priv, tt_request->src);
if (!orig_node)
goto out;
@@ -1497,7 +1504,7 @@ bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr)
goto out;
/* Check if the client has been logically deleted (but is kept for
* consistency purpose) */
- if (tt_local_entry->flags & TT_CLIENT_PENDING)
+ if (tt_local_entry->common.flags & TT_CLIENT_PENDING)
goto out;
ret = true;
out:
@@ -1720,45 +1727,53 @@ void tt_free(struct bat_priv *bat_priv)
kfree(bat_priv->tt_buff);
}
-/* This function will reset the specified flags from all the entries in
- * the given hash table and will increment num_local_tt for each involved
- * entry */
-static void tt_local_reset_flags(struct bat_priv *bat_priv, uint16_t flags)
+/* This function will enable or disable the specified flags for all the entries
+ * in the given hash table and returns the number of modified entries */
+static uint16_t tt_set_flags(struct hashtable_t *hash, uint16_t flags,
+ bool enable)
{
- int i;
- struct hashtable_t *hash = bat_priv->tt_local_hash;
+ uint32_t i;
+ uint16_t changed_num = 0;
struct hlist_head *head;
struct hlist_node *node;
- struct tt_local_entry *tt_local_entry;
+ struct tt_common_entry *tt_common_entry;
if (!hash)
- return;
+ goto out;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
- hlist_for_each_entry_rcu(tt_local_entry, node,
+ hlist_for_each_entry_rcu(tt_common_entry, node,
head, hash_entry) {
- if (!(tt_local_entry->flags & flags))
- continue;
- tt_local_entry->flags &= ~flags;
- atomic_inc(&bat_priv->num_local_tt);
+ if (enable) {
+ if ((tt_common_entry->flags & flags) == flags)
+ continue;
+ tt_common_entry->flags |= flags;
+ } else {
+ if (!(tt_common_entry->flags & flags))
+ continue;
+ tt_common_entry->flags &= ~flags;
+ }
+ changed_num++;
}
rcu_read_unlock();
}
-
+out:
+ return changed_num;
}
/* Purge out all the tt local entries marked with TT_CLIENT_PENDING */
static void tt_local_purge_pending_clients(struct bat_priv *bat_priv)
{
struct hashtable_t *hash = bat_priv->tt_local_hash;
+ struct tt_common_entry *tt_common_entry;
struct tt_local_entry *tt_local_entry;
struct hlist_node *node, *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
- int i;
+ uint32_t i;
if (!hash)
return;
@@ -1768,16 +1783,19 @@ static void tt_local_purge_pending_clients(struct bat_priv *bat_priv)
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- hlist_for_each_entry_safe(tt_local_entry, node, node_tmp,
+ hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
head, hash_entry) {
- if (!(tt_local_entry->flags & TT_CLIENT_PENDING))
+ if (!(tt_common_entry->flags & TT_CLIENT_PENDING))
continue;
bat_dbg(DBG_TT, bat_priv, "Deleting local tt entry "
- "(%pM): pending\n", tt_local_entry->addr);
+ "(%pM): pending\n", tt_common_entry->addr);
atomic_dec(&bat_priv->num_local_tt);
hlist_del_rcu(node);
+ tt_local_entry = container_of(tt_common_entry,
+ struct tt_local_entry,
+ common);
tt_local_entry_free_ref(tt_local_entry);
}
spin_unlock_bh(list_lock);
@@ -1787,7 +1805,11 @@ static void tt_local_purge_pending_clients(struct bat_priv *bat_priv)
void tt_commit_changes(struct bat_priv *bat_priv)
{
- tt_local_reset_flags(bat_priv, TT_CLIENT_NEW);
+ uint16_t changed_num = tt_set_flags(bat_priv->tt_local_hash,
+ TT_CLIENT_NEW, false);
+ /* all the reset entries have now to be effectively counted as local
+ * entries */
+ atomic_add(changed_num, &bat_priv->num_local_tt);
tt_local_purge_pending_clients(bat_priv);
/* Increment the TTVN only once per OGM interval */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ab8d0fe6df5a..e9eb043719ac 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -222,24 +222,24 @@ struct socket_packet {
struct icmp_packet_rr icmp_packet;
};
-struct tt_local_entry {
+struct tt_common_entry {
uint8_t addr[ETH_ALEN];
struct hlist_node hash_entry;
- unsigned long last_seen;
uint16_t flags;
atomic_t refcount;
struct rcu_head rcu;
};
+struct tt_local_entry {
+ struct tt_common_entry common;
+ unsigned long last_seen;
+};
+
struct tt_global_entry {
- uint8_t addr[ETH_ALEN];
- struct hlist_node hash_entry; /* entry in the global table */
+ struct tt_common_entry common;
struct orig_node *orig_node;
uint8_t ttvn;
- uint16_t flags; /* only TT_GLOBAL_ROAM is used */
unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */
- atomic_t refcount;
- struct rcu_head rcu;
};
struct tt_change_node {
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index f81a6b668b0c..cc3b9f2f3b5d 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -66,7 +66,7 @@ static int vis_info_cmp(const struct hlist_node *node, const void *data2)
/* hash function to choose an entry in a hash table of given size */
/* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */
-static int vis_info_choose(const void *data, int size)
+static uint32_t vis_info_choose(const void *data, uint32_t size)
{
const struct vis_info *vis_info = data;
const struct vis_packet *packet;
@@ -96,7 +96,7 @@ static struct vis_info *vis_hash_find(struct bat_priv *bat_priv,
struct hlist_head *head;
struct hlist_node *node;
struct vis_info *vis_info, *vis_info_tmp = NULL;
- int index;
+ uint32_t index;
if (!hash)
return NULL;
@@ -202,7 +202,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset)
HLIST_HEAD(vis_if_list);
struct if_list_entry *entry;
struct hlist_node *pos, *n;
- int i, j, ret = 0;
+ uint32_t i;
+ int j, ret = 0;
int vis_server = atomic_read(&bat_priv->vis_mode);
size_t buff_pos, buf_size;
char *buff;
@@ -556,7 +557,8 @@ static int find_best_vis_server(struct bat_priv *bat_priv,
struct hlist_head *head;
struct orig_node *orig_node;
struct vis_packet *packet;
- int best_tq = -1, i;
+ int best_tq = -1;
+ uint32_t i;
packet = (struct vis_packet *)info->skb_packet->data;
@@ -607,8 +609,9 @@ static int generate_vis_packet(struct bat_priv *bat_priv)
struct vis_info *info = bat_priv->my_vis_info;
struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data;
struct vis_info_entry *entry;
- struct tt_local_entry *tt_local_entry;
- int best_tq = -1, i;
+ struct tt_common_entry *tt_common_entry;
+ int best_tq = -1;
+ uint32_t i;
info->first_seen = jiffies;
packet->vis_type = atomic_read(&bat_priv->vis_mode);
@@ -669,13 +672,13 @@ next:
head = &hash->table[i];
rcu_read_lock();
- hlist_for_each_entry_rcu(tt_local_entry, node, head,
+ hlist_for_each_entry_rcu(tt_common_entry, node, head,
hash_entry) {
entry = (struct vis_info_entry *)
skb_put(info->skb_packet,
sizeof(*entry));
memset(entry->src, 0, ETH_ALEN);
- memcpy(entry->dest, tt_local_entry->addr, ETH_ALEN);
+ memcpy(entry->dest, tt_common_entry->addr, ETH_ALEN);
entry->quality = 0; /* 0 means TT */
packet->entries++;
@@ -696,7 +699,7 @@ unlock:
* held */
static void purge_vis_packets(struct bat_priv *bat_priv)
{
- int i;
+ uint32_t i;
struct hashtable_t *hash = bat_priv->vis_hash;
struct hlist_node *node, *node_tmp;
struct hlist_head *head;
@@ -733,7 +736,7 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv,
struct sk_buff *skb;
struct hard_iface *hard_iface;
uint8_t dstaddr[ETH_ALEN];
- int i;
+ uint32_t i;
packet = (struct vis_packet *)info->skb_packet->data;
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 42d53b85a808..a779ec703323 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -56,8 +56,8 @@
#define VERSION "1.3"
-static int compress_src = 1;
-static int compress_dst = 1;
+static bool compress_src = true;
+static bool compress_dst = true;
static LIST_HEAD(bnep_session_list);
static DECLARE_RWSEM(bnep_session_sem);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 919e3c0e74aa..4221bd256bdd 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -45,7 +45,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
-static int enable_le;
+static bool enable_le;
/* Handle HCI Event packets */
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 189a667c293b..6d94616af312 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -49,7 +49,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
-static int enable_mgmt;
+static bool enable_mgmt;
/* ----- HCI socket interface ----- */
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index cd7bb3d7f2b4..aa78d8c4b93b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -57,7 +57,7 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/smp.h>
-int disable_ertm;
+bool disable_ertm;
static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
static u8 l2cap_fixed_chan[8] = { L2CAP_FC_L2CAP, };
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 09a3cbcf794e..501649bf5596 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -51,8 +51,8 @@
#define VERSION "1.11"
-static int disable_cfc;
-static int l2cap_ertm;
+static bool disable_cfc;
+static bool l2cap_ertm;
static int channel_mtu = -1;
static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 0d59e61d7822..5dc2f2126fac 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -51,7 +51,7 @@
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/sco.h>
-static int disable_esco;
+static bool disable_esco;
static const struct proto_ops sco_sock_ops;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index f20c4fd915a8..ba780cc8e515 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -62,7 +62,7 @@ static int __init br_init(void)
brioctl_set(br_ioctl_deviceless_stub);
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
@@ -93,7 +93,7 @@ static void __exit br_deinit(void)
rcu_barrier(); /* Wait for completion of call_rcu()'s */
br_netfilter_fini();
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = NULL;
#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index feb77ea7b58e..71773b014e0c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -170,8 +170,11 @@ static int br_set_mac_address(struct net_device *dev, void *p)
return -EINVAL;
spin_lock_bh(&br->lock);
- memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
- br_stp_change_bridge_id(br, addr->sa_data);
+ if (compare_ether_addr(dev->dev_addr, addr->sa_data)) {
+ memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+ br_fdb_change_mac_address(br, addr->sa_data);
+ br_stp_change_bridge_id(br, addr->sa_data);
+ }
br->flags |= BR_SET_MAC_ADDR;
spin_unlock_bh(&br->lock);
@@ -186,7 +189,8 @@ static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
strcpy(info->bus_info, "N/A");
}
-static u32 br_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t br_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
struct net_bridge *br = netdev_priv(dev);
@@ -341,10 +345,10 @@ void br_dev_setup(struct net_device *dev)
dev->priv_flags = IFF_EBRIDGE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
- NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
+ NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX;
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
- NETIF_F_GSO_MASK | NETIF_F_NO_CSUM |
+ NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
NETIF_F_HW_VLAN_TX;
br->dev = dev;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c8e7861b88b0..f963f6b1884f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -28,7 +28,8 @@
static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr);
-static void fdb_notify(const struct net_bridge_fdb_entry *, int);
+static void fdb_notify(struct net_bridge *br,
+ const struct net_bridge_fdb_entry *, int);
static u32 fdb_salt __read_mostly;
@@ -80,10 +81,10 @@ static void fdb_rcu_free(struct rcu_head *head)
kmem_cache_free(br_fdb_cache, ent);
}
-static inline void fdb_delete(struct net_bridge_fdb_entry *f)
+static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
{
- fdb_notify(f, RTM_DELNEIGH);
hlist_del_rcu(&f->hlist);
+ fdb_notify(br, f, RTM_DELNEIGH);
call_rcu(&f->rcu, fdb_rcu_free);
}
@@ -114,7 +115,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
}
/* delete old one */
- fdb_delete(f);
+ fdb_delete(br, f);
goto insert;
}
}
@@ -126,6 +127,18 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
spin_unlock_bh(&br->hash_lock);
}
+void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
+{
+ struct net_bridge_fdb_entry *f;
+
+ /* If old entry was unassociated with any port, then delete it. */
+ f = __br_fdb_get(br, br->dev->dev_addr);
+ if (f && f->is_local && !f->dst)
+ fdb_delete(br, f);
+
+ fdb_insert(br, NULL, newaddr);
+}
+
void br_fdb_cleanup(unsigned long _data)
{
struct net_bridge *br = (struct net_bridge *)_data;
@@ -144,7 +157,7 @@ void br_fdb_cleanup(unsigned long _data)
continue;
this_timer = f->updated + delay;
if (time_before_eq(this_timer, jiffies))
- fdb_delete(f);
+ fdb_delete(br, f);
else if (time_before(this_timer, next_timer))
next_timer = this_timer;
}
@@ -165,7 +178,7 @@ void br_fdb_flush(struct net_bridge *br)
struct hlist_node *h, *n;
hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
if (!f->is_static)
- fdb_delete(f);
+ fdb_delete(br, f);
}
}
spin_unlock_bh(&br->hash_lock);
@@ -209,7 +222,7 @@ void br_fdb_delete_by_port(struct net_bridge *br,
}
}
- fdb_delete(f);
+ fdb_delete(br, f);
skip_delete: ;
}
}
@@ -234,7 +247,7 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
return NULL;
}
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if IS_ENABLED(CONFIG_ATM_LANE)
/* Interface used by ATM LANE hook to test
* if an addr is on some other bridge port */
int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
@@ -249,7 +262,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
ret = 0;
else {
fdb = __br_fdb_get(port->br, addr);
- ret = fdb && fdb->dst->dev != dev &&
+ ret = fdb && fdb->dst && fdb->dst->dev != dev &&
fdb->dst->state == BR_STATE_FORWARDING;
}
rcu_read_unlock();
@@ -281,6 +294,10 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
if (has_expired(br, f))
continue;
+ /* ignore pseudo entry for local MAC address */
+ if (!f->dst)
+ continue;
+
if (skip) {
--skip;
continue;
@@ -347,7 +364,6 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
fdb->is_static = 0;
fdb->updated = fdb->used = jiffies;
hlist_add_head_rcu(&fdb->hlist, head);
- fdb_notify(fdb, RTM_NEWNEIGH);
}
return fdb;
}
@@ -371,7 +387,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
br_warn(br, "adding interface %s with same address "
"as a received packet\n",
source->dev->name);
- fdb_delete(fdb);
+ fdb_delete(br, fdb);
}
fdb = fdb_create(head, source, addr);
@@ -379,6 +395,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
return -ENOMEM;
fdb->is_local = fdb->is_static = 1;
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
return 0;
}
@@ -424,9 +441,11 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
}
} else {
spin_lock(&br->hash_lock);
- if (likely(!fdb_find(head, addr)))
- fdb_create(head, source, addr);
-
+ if (likely(!fdb_find(head, addr))) {
+ fdb = fdb_create(head, source, addr);
+ if (fdb)
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
+ }
/* else we lose race and someone else inserts
* it first, don't bother updating
*/
@@ -446,7 +465,7 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
return NUD_REACHABLE;
}
-static int fdb_fill_info(struct sk_buff *skb,
+static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb,
u32 pid, u32 seq, int type, unsigned int flags)
{
@@ -459,14 +478,13 @@ static int fdb_fill_info(struct sk_buff *skb,
if (nlh == NULL)
return -EMSGSIZE;
-
ndm = nlmsg_data(nlh);
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
ndm->ndm_flags = 0;
ndm->ndm_type = 0;
- ndm->ndm_ifindex = fdb->dst->dev->ifindex;
+ ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
ndm->ndm_state = fdb_to_nud(fdb);
NLA_PUT(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr);
@@ -491,9 +509,10 @@ static inline size_t fdb_nlmsg_size(void)
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
-static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
+static void fdb_notify(struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb, int type)
{
- struct net *net = dev_net(fdb->dst->dev);
+ struct net *net = dev_net(br->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -501,7 +520,7 @@ static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
if (skb == NULL)
goto errout;
- err = fdb_fill_info(skb, fdb, 0, 0, type, 0);
+ err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -538,7 +557,7 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (idx < cb->args[0])
goto skip;
- if (fdb_fill_info(skb, f,
+ if (fdb_fill_info(skb, br, f,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
@@ -556,7 +575,7 @@ skip:
return skb->len;
}
-/* Create new static fdb entry */
+/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
__u16 state, __u16 flags)
{
@@ -572,19 +591,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
fdb = fdb_create(head, source, addr);
if (!fdb)
return -ENOMEM;
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
} else {
if (flags & NLM_F_EXCL)
return -EEXIST;
+ }
+
+ if (fdb_to_nud(fdb) != state) {
+ if (state & NUD_PERMANENT)
+ fdb->is_local = fdb->is_static = 1;
+ else if (state & NUD_NOARP) {
+ fdb->is_local = 0;
+ fdb->is_static = 1;
+ } else
+ fdb->is_local = fdb->is_static = 0;
- if (flags & NLM_F_REPLACE)
- fdb->updated = fdb->used = jiffies;
- fdb->is_local = fdb->is_static = 0;
+ fdb->updated = fdb->used = jiffies;
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
}
- if (state & NUD_PERMANENT)
- fdb->is_local = fdb->is_static = 1;
- else if (state & NUD_NOARP)
- fdb->is_static = 1;
return 0;
}
@@ -627,6 +652,11 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return -EINVAL;
}
+ if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
+ pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
+ return -EINVAL;
+ }
+
p = br_port_get_rtnl(dev);
if (p == NULL) {
pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
@@ -634,9 +664,15 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return -EINVAL;
}
- spin_lock_bh(&p->br->hash_lock);
- err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags);
- spin_unlock_bh(&p->br->hash_lock);
+ if (ndm->ndm_flags & NTF_USE) {
+ rcu_read_lock();
+ br_fdb_update(p->br, p, addr);
+ rcu_read_unlock();
+ } else {
+ spin_lock_bh(&p->br->hash_lock);
+ err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags);
+ spin_unlock_bh(&p->br->hash_lock);
+ }
return err;
}
@@ -651,7 +687,7 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
if (!fdb)
return -ENOENT;
- fdb_delete(fdb);
+ fdb_delete(p->br, fdb);
return 0;
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index ee64287f1290..61f65344e711 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -98,7 +98,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
/* called with rcu_read_lock */
void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
- if (should_deliver(to, skb)) {
+ if (to && should_deliver(to, skb)) {
__br_deliver(to, skb);
return;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f603e5b0b930..0a942fbccc9a 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -296,10 +296,11 @@ int br_min_mtu(const struct net_bridge *br)
/*
* Recomputes features using slave's features
*/
-u32 br_features_recompute(struct net_bridge *br, u32 features)
+netdev_features_t br_features_recompute(struct net_bridge *br,
+ netdev_features_t features)
{
struct net_bridge_port *p;
- u32 mask;
+ netdev_features_t mask;
if (list_empty(&br->port_list))
return features;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 995cbe0ac0b2..568d5bf17534 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -24,7 +24,7 @@
#include <linux/slab.h>
#include <linux/timer.h>
#include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/mld.h>
#include <net/addrconf.h>
@@ -36,7 +36,7 @@
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
{
if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
@@ -52,7 +52,7 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
switch (a->proto) {
case htons(ETH_P_IP):
return a->u.ip4 == b->u.ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
return ipv6_addr_equal(&a->u.ip6, &b->u.ip6);
#endif
@@ -65,7 +65,7 @@ static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
const struct in6_addr *ip)
{
@@ -79,7 +79,7 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
switch (ip->proto) {
case htons(ETH_P_IP):
return __br_ip4_hash(mdb, ip->u.ip4);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
return __br_ip6_hash(mdb, &ip->u.ip6);
#endif
@@ -121,13 +121,13 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
return br_mdb_ip_get(mdb, &br_dst);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static struct net_bridge_mdb_entry *br_mdb_ip6_get(
struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
{
struct br_ip br_dst;
- ipv6_addr_copy(&br_dst.u.ip6, dst);
+ br_dst.u.ip6 = *dst;
br_dst.proto = htons(ETH_P_IPV6);
return br_mdb_ip_get(mdb, &br_dst);
@@ -152,9 +152,9 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
case htons(ETH_P_IP):
ip.u.ip4 = ip_hdr(skb)->daddr;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr);
+ ip.u.ip6 = ipv6_hdr(skb)->daddr;
break;
#endif
default:
@@ -411,7 +411,7 @@ out:
return skb;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
const struct in6_addr *group)
{
@@ -474,7 +474,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mldq->mld_cksum = 0;
mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
mldq->mld_reserved = 0;
- ipv6_addr_copy(&mldq->mld_mca, group);
+ mldq->mld_mca = *group;
/* checksum */
mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -496,7 +496,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
switch (addr->proto) {
case htons(ETH_P_IP):
return br_ip4_multicast_alloc_query(br, addr->u.ip4);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
#endif
@@ -773,7 +773,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
return br_multicast_add_group(br, port, &br_group);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
const struct in6_addr *group)
@@ -783,7 +783,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
if (!ipv6_is_transient_multicast(group))
return 0;
- ipv6_addr_copy(&br_group.u.ip6, group);
+ br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
return br_multicast_add_group(br, port, &br_group);
@@ -845,7 +845,7 @@ static void br_multicast_send_query(struct net_bridge *br,
br_group.proto = htons(ETH_P_IP);
__br_multicast_send_query(br, port, &br_group);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
br_group.proto = htons(ETH_P_IPV6);
__br_multicast_send_query(br, port, &br_group);
#endif
@@ -989,7 +989,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
return err;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_mld2_report(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb)
@@ -1185,7 +1185,7 @@ out:
return err;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_query(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb)
@@ -1334,7 +1334,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
br_multicast_leave_group(br, port, &br_group);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_leave_group(struct net_bridge *br,
struct net_bridge_port *port,
const struct in6_addr *group)
@@ -1344,7 +1344,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
if (!ipv6_is_transient_multicast(group))
return;
- ipv6_addr_copy(&br_group.u.ip6, group);
+ br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_multicast_leave_group(br, port, &br_group);
@@ -1449,7 +1449,7 @@ err_out:
return err;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int br_multicast_ipv6_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb)
@@ -1458,6 +1458,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
const struct ipv6hdr *ip6h;
u8 icmp6_type;
u8 nexthdr;
+ __be16 frag_off;
unsigned len;
int offset;
int err;
@@ -1483,7 +1484,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
return -EINVAL;
nexthdr = ip6h->nexthdr;
- offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr);
+ offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off);
if (offset < 0 || nexthdr != IPPROTO_ICMPV6)
return 0;
@@ -1501,6 +1502,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
__skb_pull(skb2, offset);
skb_reset_transport_header(skb2);
+ skb_postpull_rcsum(skb2, skb_network_header(skb2),
+ skb_network_header_len(skb2));
icmp6_type = icmp6_hdr(skb2)->icmp6_type;
@@ -1593,7 +1596,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
switch (skb->protocol) {
case htons(ETH_P_IP):
return br_multicast_ipv4_rcv(br, port, skb);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
return br_multicast_ipv6_rcv(br, port, skb);
#endif
@@ -1770,7 +1773,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
int err = 0;
struct net_bridge_mdb_htable *mdb;
- spin_lock(&br->multicast_lock);
+ spin_lock_bh(&br->multicast_lock);
if (br->multicast_disabled == !val)
goto unlock;
@@ -1806,7 +1809,7 @@ rollback:
}
unlock:
- spin_unlock(&br->multicast_lock);
+ spin_unlock_bh(&br->multicast_lock);
return err;
}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index d6ec3720c77e..84122472656c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -114,12 +114,18 @@ static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const vo
return NULL;
}
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+ return dst->dev->mtu;
+}
+
static struct dst_ops fake_dst_ops = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
.update_pmtu = fake_update_pmtu,
.cow_metrics = fake_cow_metrics,
.neigh_lookup = fake_neigh_lookup,
+ .mtu = fake_mtu,
};
/*
@@ -141,7 +147,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
rt->dst.dev = br->dev;
rt->dst.path = &rt->dst;
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
- rt->dst.flags = DST_NOXFRM;
+ rt->dst.flags = DST_NOXFRM | DST_NOPEER;
rt->dst.ops = &fake_dst_ops;
}
@@ -356,7 +362,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
if (!skb->dev)
goto free_skb;
dst = skb_dst(skb);
- neigh = dst_get_neighbour(dst);
+ neigh = dst_get_neighbour_noref(dst);
if (neigh->hh.hh_len) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
@@ -807,7 +813,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
return NF_STOLEN;
}
-#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
int ret;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e5f9ece3c9a0..a1daf8227ed1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -18,6 +18,7 @@
#include <net/sock.h>
#include "br_private.h"
+#include "br_private_stp.h"
static inline size_t br_nlmsg_size(void)
{
@@ -188,6 +189,11 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
p->state = new_state;
br_log_state(p);
+
+ spin_lock_bh(&p->br->lock);
+ br_port_state_selection(p->br);
+ spin_unlock_bh(&p->br->lock);
+
br_ifinfo_notify(RTM_NEWLINK, p);
return 0;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d7d6fb05411f..0b67a63ad7a8 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -56,7 +56,7 @@ struct br_ip
{
union {
__be32 ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ip6;
#endif
} u;
@@ -348,6 +348,7 @@ extern void br_fdb_fini(void);
extern void br_fdb_flush(struct net_bridge *br);
extern void br_fdb_changeaddr(struct net_bridge_port *p,
const unsigned char *newaddr);
+extern void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
extern void br_fdb_cleanup(unsigned long arg);
extern void br_fdb_delete_by_port(struct net_bridge *br,
const struct net_bridge_port *p, int do_all);
@@ -387,7 +388,8 @@ extern int br_add_if(struct net_bridge *br,
extern int br_del_if(struct net_bridge *br,
struct net_device *dev);
extern int br_min_mtu(const struct net_bridge *br);
-extern u32 br_features_recompute(struct net_bridge *br, u32 features);
+extern netdev_features_t br_features_recompute(struct net_bridge *br,
+ netdev_features_t features);
/* br_input.c */
extern int br_handle_frame_finish(struct sk_buff *skb);
@@ -535,7 +537,7 @@ extern void br_stp_port_timer_init(struct net_bridge_port *p);
extern unsigned long br_timer_value(const struct timer_list *timer);
/* br.c */
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if IS_ENABLED(CONFIG_ATM_LANE)
extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr);
#endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index ad0a3f7cf6cc..dd147d78a588 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -399,25 +399,24 @@ void br_port_state_selection(struct net_bridge *br)
struct net_bridge_port *p;
unsigned int liveports = 0;
- /* Don't change port states if userspace is handling STP */
- if (br->stp_enabled == BR_USER_STP)
- return;
-
list_for_each_entry(p, &br->port_list, list) {
if (p->state == BR_STATE_DISABLED)
continue;
- if (p->port_no == br->root_port) {
- p->config_pending = 0;
- p->topology_change_ack = 0;
- br_make_forwarding(p);
- } else if (br_is_designated_port(p)) {
- del_timer(&p->message_age_timer);
- br_make_forwarding(p);
- } else {
- p->config_pending = 0;
- p->topology_change_ack = 0;
- br_make_blocking(p);
+ /* Don't change port states if userspace is handling STP */
+ if (br->stp_enabled != BR_USER_STP) {
+ if (p->port_no == br->root_port) {
+ p->config_pending = 0;
+ p->topology_change_ack = 0;
+ br_make_forwarding(p);
+ } else if (br_is_designated_port(p)) {
+ del_timer(&p->message_age_timer);
+ br_make_forwarding(p);
+ } else {
+ p->config_pending = 0;
+ p->topology_change_ack = 0;
+ br_make_blocking(p);
+ }
}
if (p->state == BR_STATE_FORWARDING)
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 2ed0056a39a8..99c85668f551 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -55,9 +55,10 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (info->bitmask & EBT_IP6_PROTO) {
uint8_t nexthdr = ih6->nexthdr;
+ __be16 frag_off;
int offset_ph;
- offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr);
+ offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off);
if (offset_ph == -1)
return false;
if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 6e5a8bb9b940..f88ee537fb2b 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -107,12 +107,13 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
goto out;
}
-#if defined(CONFIG_BRIDGE_EBT_IP6) || defined(CONFIG_BRIDGE_EBT_IP6_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE_EBT_IP6)
if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto ==
htons(ETH_P_IPV6)) {
const struct ipv6hdr *ih;
struct ipv6hdr _iph;
uint8_t nexthdr;
+ __be16 frag_off;
int offset_ph;
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
@@ -123,7 +124,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
&ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
nexthdr = ih->nexthdr;
- offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr);
+ offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off);
if (offset_ph == -1)
goto out;
print_ports(skb, nexthdr, offset_ph);
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index 529750da9624..936361e5a2b6 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -40,3 +40,14 @@ config CAIF_NETDEV
If you select to build it as a built-in then the main CAIF device must
also be a built-in.
If unsure say Y.
+
+config CAIF_USB
+ tristate "CAIF USB support"
+ depends on CAIF
+ default n
+ ---help---
+ Say Y if you are using CAIF over USB CDC NCM.
+ This can be either built-in or a loadable module,
+ If you select to build it as a built-in then the main CAIF device must
+ also be a built-in.
+ If unsure say N.
diff --git a/net/caif/Makefile b/net/caif/Makefile
index ebcd4e7e6f47..cc2b51154d03 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -10,5 +10,6 @@ caif-y := caif_dev.o \
obj-$(CONFIG_CAIF) += caif.o
obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
obj-$(CONFIG_CAIF) += caif_socket.o
+obj-$(CONFIG_CAIF_USB) += caif_usb.o
export-y := caif.o
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index f1fa1f6e658d..b0ce14fbf6ef 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -17,6 +17,7 @@
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/module.h>
+#include <linux/spinlock.h>
#include <net/netns/generic.h>
#include <net/net_namespace.h>
#include <net/pkt_sched.h>
@@ -24,6 +25,7 @@
#include <net/caif/caif_layer.h>
#include <net/caif/cfpkt.h>
#include <net/caif/cfcnfg.h>
+#include <net/caif/cfserl.h>
MODULE_LICENSE("GPL");
@@ -33,6 +35,10 @@ struct caif_device_entry {
struct list_head list;
struct net_device *netdev;
int __percpu *pcpu_refcnt;
+ spinlock_t flow_lock;
+ struct sk_buff *xoff_skb;
+ void (*xoff_skb_dtor)(struct sk_buff *skb);
+ bool xoff;
};
struct caif_device_entry_list {
@@ -47,13 +53,14 @@ struct caif_net {
};
static int caif_net_id;
+static int q_high = 50; /* Percent */
struct cfcnfg *get_cfcnfg(struct net *net)
{
struct caif_net *caifn;
- BUG_ON(!net);
caifn = net_generic(net, caif_net_id);
- BUG_ON(!caifn);
+ if (!caifn)
+ return NULL;
return caifn->cfg;
}
EXPORT_SYMBOL(get_cfcnfg);
@@ -61,9 +68,9 @@ EXPORT_SYMBOL(get_cfcnfg);
static struct caif_device_entry_list *caif_device_list(struct net *net)
{
struct caif_net *caifn;
- BUG_ON(!net);
caifn = net_generic(net, caif_net_id);
- BUG_ON(!caifn);
+ if (!caifn)
+ return NULL;
return &caifn->caifdevs;
}
@@ -92,7 +99,8 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
struct caif_device_entry *caifd;
caifdevs = caif_device_list(dev_net(dev));
- BUG_ON(!caifdevs);
+ if (!caifdevs)
+ return NULL;
caifd = kzalloc(sizeof(*caifd), GFP_KERNEL);
if (!caifd)
@@ -112,7 +120,9 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
struct caif_device_entry_list *caifdevs =
caif_device_list(dev_net(dev));
struct caif_device_entry *caifd;
- BUG_ON(!caifdevs);
+ if (!caifdevs)
+ return NULL;
+
list_for_each_entry_rcu(caifd, &caifdevs->list, list) {
if (caifd->netdev == dev)
return caifd;
@@ -120,15 +130,106 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
return NULL;
}
+void caif_flow_cb(struct sk_buff *skb)
+{
+ struct caif_device_entry *caifd;
+ void (*dtor)(struct sk_buff *skb) = NULL;
+ bool send_xoff;
+
+ WARN_ON(skb->dev == NULL);
+
+ rcu_read_lock();
+ caifd = caif_get(skb->dev);
+ caifd_hold(caifd);
+ rcu_read_unlock();
+
+ spin_lock_bh(&caifd->flow_lock);
+ send_xoff = caifd->xoff;
+ caifd->xoff = 0;
+ if (!WARN_ON(caifd->xoff_skb_dtor == NULL)) {
+ WARN_ON(caifd->xoff_skb != skb);
+ dtor = caifd->xoff_skb_dtor;
+ caifd->xoff_skb = NULL;
+ caifd->xoff_skb_dtor = NULL;
+ }
+ spin_unlock_bh(&caifd->flow_lock);
+
+ if (dtor)
+ dtor(skb);
+
+ if (send_xoff)
+ caifd->layer.up->
+ ctrlcmd(caifd->layer.up,
+ _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND,
+ caifd->layer.id);
+ caifd_put(caifd);
+}
+
static int transmit(struct cflayer *layer, struct cfpkt *pkt)
{
- int err;
+ int err, high = 0, qlen = 0;
+ struct caif_dev_common *caifdev;
struct caif_device_entry *caifd =
container_of(layer, struct caif_device_entry, layer);
struct sk_buff *skb;
+ struct netdev_queue *txq;
+
+ rcu_read_lock_bh();
skb = cfpkt_tonative(pkt);
skb->dev = caifd->netdev;
+ skb_reset_network_header(skb);
+ skb->protocol = htons(ETH_P_CAIF);
+ caifdev = netdev_priv(caifd->netdev);
+
+ /* Check if we need to handle xoff */
+ if (likely(caifd->netdev->tx_queue_len == 0))
+ goto noxoff;
+
+ if (unlikely(caifd->xoff))
+ goto noxoff;
+
+ if (likely(!netif_queue_stopped(caifd->netdev))) {
+ /* If we run with a TX queue, check if the queue is too long*/
+ txq = netdev_get_tx_queue(skb->dev, 0);
+ qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc));
+
+ if (likely(qlen == 0))
+ goto noxoff;
+
+ high = (caifd->netdev->tx_queue_len * q_high) / 100;
+ if (likely(qlen < high))
+ goto noxoff;
+ }
+
+ /* Hold lock while accessing xoff */
+ spin_lock_bh(&caifd->flow_lock);
+ if (caifd->xoff) {
+ spin_unlock_bh(&caifd->flow_lock);
+ goto noxoff;
+ }
+
+ /*
+ * Handle flow off, we do this by temporary hi-jacking this
+ * skb's destructor function, and replace it with our own
+ * flow-on callback. The callback will set flow-on and call
+ * the original destructor.
+ */
+
+ pr_debug("queue has stopped(%d) or is full (%d > %d)\n",
+ netif_queue_stopped(caifd->netdev),
+ qlen, high);
+ caifd->xoff = 1;
+ caifd->xoff_skb = skb;
+ caifd->xoff_skb_dtor = skb->destructor;
+ skb->destructor = caif_flow_cb;
+ spin_unlock_bh(&caifd->flow_lock);
+
+ caifd->layer.up->ctrlcmd(caifd->layer.up,
+ _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
+ caifd->layer.id);
+noxoff:
+ rcu_read_unlock_bh();
err = dev_queue_xmit(skb);
if (err > 0)
@@ -172,7 +273,10 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
/* Release reference to stack upwards */
caifd_put(caifd);
- return 0;
+
+ if (err != 0)
+ err = NET_RX_DROP;
+ return err;
}
static struct packet_type caif_packet_type __read_mostly = {
@@ -203,6 +307,57 @@ static void dev_flowctrl(struct net_device *dev, int on)
caifd_put(caifd);
}
+void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+ struct cflayer *link_support, int head_room,
+ struct cflayer **layer, int (**rcv_func)(
+ struct sk_buff *, struct net_device *,
+ struct packet_type *, struct net_device *))
+{
+ struct caif_device_entry *caifd;
+ enum cfcnfg_phy_preference pref;
+ struct cfcnfg *cfg = get_cfcnfg(dev_net(dev));
+ struct caif_device_entry_list *caifdevs;
+
+ caifdevs = caif_device_list(dev_net(dev));
+ if (!cfg || !caifdevs)
+ return;
+ caifd = caif_device_alloc(dev);
+ if (!caifd)
+ return;
+ *layer = &caifd->layer;
+ spin_lock_init(&caifd->flow_lock);
+
+ switch (caifdev->link_select) {
+ case CAIF_LINK_HIGH_BANDW:
+ pref = CFPHYPREF_HIGH_BW;
+ break;
+ case CAIF_LINK_LOW_LATENCY:
+ pref = CFPHYPREF_LOW_LAT;
+ break;
+ default:
+ pref = CFPHYPREF_HIGH_BW;
+ break;
+ }
+ mutex_lock(&caifdevs->lock);
+ list_add_rcu(&caifd->list, &caifdevs->list);
+
+ strncpy(caifd->layer.name, dev->name,
+ sizeof(caifd->layer.name) - 1);
+ caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+ caifd->layer.transmit = transmit;
+ cfcnfg_add_phy_layer(cfg,
+ dev,
+ &caifd->layer,
+ pref,
+ link_support,
+ caifdev->use_fcs,
+ head_room);
+ mutex_unlock(&caifdevs->lock);
+ if (rcv_func)
+ *rcv_func = receive;
+}
+EXPORT_SYMBOL(caif_enroll_dev);
+
/* notify Caif of device events */
static int caif_device_notify(struct notifier_block *me, unsigned long what,
void *arg)
@@ -210,62 +365,40 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
struct net_device *dev = arg;
struct caif_device_entry *caifd = NULL;
struct caif_dev_common *caifdev;
- enum cfcnfg_phy_preference pref;
- enum cfcnfg_phy_type phy_type;
struct cfcnfg *cfg;
+ struct cflayer *layer, *link_support;
+ int head_room = 0;
struct caif_device_entry_list *caifdevs;
- if (dev->type != ARPHRD_CAIF)
- return 0;
-
cfg = get_cfcnfg(dev_net(dev));
- if (cfg == NULL)
+ caifdevs = caif_device_list(dev_net(dev));
+ if (!cfg || !caifdevs)
return 0;
- caifdevs = caif_device_list(dev_net(dev));
+ caifd = caif_get(dev);
+ if (caifd == NULL && dev->type != ARPHRD_CAIF)
+ return 0;
switch (what) {
case NETDEV_REGISTER:
- caifd = caif_device_alloc(dev);
- if (!caifd)
- return 0;
+ if (caifd != NULL)
+ break;
caifdev = netdev_priv(dev);
- caifdev->flowctrl = dev_flowctrl;
- caifd->layer.transmit = transmit;
-
- if (caifdev->use_frag)
- phy_type = CFPHYTYPE_FRAG;
- else
- phy_type = CFPHYTYPE_CAIF;
-
- switch (caifdev->link_select) {
- case CAIF_LINK_HIGH_BANDW:
- pref = CFPHYPREF_HIGH_BW;
- break;
- case CAIF_LINK_LOW_LATENCY:
- pref = CFPHYPREF_LOW_LAT;
- break;
- default:
- pref = CFPHYPREF_HIGH_BW;
- break;
+ link_support = NULL;
+ if (caifdev->use_frag) {
+ head_room = 1;
+ link_support = cfserl_create(dev->ifindex,
+ caifdev->use_stx);
+ if (!link_support) {
+ pr_warn("Out of memory\n");
+ break;
+ }
}
- strncpy(caifd->layer.name, dev->name,
- sizeof(caifd->layer.name) - 1);
- caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
-
- mutex_lock(&caifdevs->lock);
- list_add_rcu(&caifd->list, &caifdevs->list);
-
- cfcnfg_add_phy_layer(cfg,
- phy_type,
- dev,
- &caifd->layer,
- pref,
- caifdev->use_fcs,
- caifdev->use_stx);
- mutex_unlock(&caifdevs->lock);
+ caif_enroll_dev(dev, caifdev, link_support, head_room,
+ &layer, NULL);
+ caifdev->flowctrl = dev_flowctrl;
break;
case NETDEV_UP:
@@ -277,6 +410,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
break;
}
+ caifd->xoff = 0;
cfcnfg_set_phy_state(cfg, &caifd->layer, true);
rcu_read_unlock();
@@ -298,6 +432,24 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
caifd->layer.up->ctrlcmd(caifd->layer.up,
_CAIF_CTRLCMD_PHYIF_DOWN_IND,
caifd->layer.id);
+
+ spin_lock_bh(&caifd->flow_lock);
+
+ /*
+ * Replace our xoff-destructor with original destructor.
+ * We trust that skb->destructor *always* is called before
+ * the skb reference is invalid. The hijacked SKB destructor
+ * takes the flow_lock so manipulating the skb->destructor here
+ * should be safe.
+ */
+ if (caifd->xoff_skb_dtor != NULL && caifd->xoff_skb != NULL)
+ caifd->xoff_skb->destructor = caifd->xoff_skb_dtor;
+
+ caifd->xoff = 0;
+ caifd->xoff_skb_dtor = NULL;
+ caifd->xoff_skb = NULL;
+
+ spin_unlock_bh(&caifd->flow_lock);
caifd_put(caifd);
break;
@@ -353,15 +505,15 @@ static struct notifier_block caif_device_notifier = {
static int caif_init_net(struct net *net)
{
struct caif_net *caifn = net_generic(net, caif_net_id);
- BUG_ON(!caifn);
+ if (WARN_ON(!caifn))
+ return -EINVAL;
+
INIT_LIST_HEAD(&caifn->caifdevs.list);
mutex_init(&caifn->caifdevs.lock);
caifn->cfg = cfcnfg_create();
- if (!caifn->cfg) {
- pr_warn("can't create cfcnfg\n");
+ if (!caifn->cfg)
return -ENOMEM;
- }
return 0;
}
@@ -371,17 +523,14 @@ static void caif_exit_net(struct net *net)
struct caif_device_entry *caifd, *tmp;
struct caif_device_entry_list *caifdevs =
caif_device_list(net);
- struct cfcnfg *cfg;
+ struct cfcnfg *cfg = get_cfcnfg(net);
+
+ if (!cfg || !caifdevs)
+ return;
rtnl_lock();
mutex_lock(&caifdevs->lock);
- cfg = get_cfcnfg(net);
- if (cfg == NULL) {
- mutex_unlock(&caifdevs->lock);
- return;
- }
-
list_for_each_entry_safe(caifd, tmp, &caifdevs->list, list) {
int i = 0;
list_del_rcu(&caifd->list);
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
new file mode 100644
index 000000000000..5fc9eca8cd41
--- /dev/null
+++ b/net/caif/caif_usb.c
@@ -0,0 +1,208 @@
+/*
+ * CAIF USB handler
+ * Copyright (C) ST-Ericsson AB 2011
+ * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/mii.h>
+#include <linux/usb.h>
+#include <linux/usb/usbnet.h>
+#include <net/netns/generic.h>
+#include <net/caif/caif_dev.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfcnfg.h>
+
+MODULE_LICENSE("GPL");
+
+#define CFUSB_PAD_DESCR_SZ 1 /* Alignment descriptor length */
+#define CFUSB_ALIGNMENT 4 /* Number of bytes to align. */
+#define CFUSB_MAX_HEADLEN (CFUSB_PAD_DESCR_SZ + CFUSB_ALIGNMENT-1)
+#define STE_USB_VID 0x04cc /* USB Product ID for ST-Ericsson */
+#define STE_USB_PID_CAIF 0x2306 /* Product id for CAIF Modems */
+
+struct cfusbl {
+ struct cflayer layer;
+ u8 tx_eth_hdr[ETH_HLEN];
+};
+
+static bool pack_added;
+
+static int cfusbl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+ u8 hpad;
+
+ /* Remove padding. */
+ cfpkt_extr_head(pkt, &hpad, 1);
+ cfpkt_extr_head(pkt, NULL, hpad);
+ return layr->up->receive(layr->up, pkt);
+}
+
+static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+ struct caif_payload_info *info;
+ u8 hpad;
+ u8 zeros[CFUSB_ALIGNMENT];
+ struct sk_buff *skb;
+ struct cfusbl *usbl = container_of(layr, struct cfusbl, layer);
+
+ skb = cfpkt_tonative(pkt);
+
+ skb_reset_network_header(skb);
+ skb->protocol = htons(ETH_P_IP);
+
+ info = cfpkt_info(pkt);
+ hpad = (info->hdr_len + CFUSB_PAD_DESCR_SZ) & (CFUSB_ALIGNMENT - 1);
+
+ if (skb_headroom(skb) < ETH_HLEN + CFUSB_PAD_DESCR_SZ + hpad) {
+ pr_warn("Headroom to small\n");
+ kfree_skb(skb);
+ return -EIO;
+ }
+ memset(zeros, 0, hpad);
+
+ cfpkt_add_head(pkt, zeros, hpad);
+ cfpkt_add_head(pkt, &hpad, 1);
+ cfpkt_add_head(pkt, usbl->tx_eth_hdr, sizeof(usbl->tx_eth_hdr));
+ return layr->dn->transmit(layr->dn, pkt);
+}
+
+static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+ int phyid)
+{
+ if (layr->up && layr->up->ctrlcmd)
+ layr->up->ctrlcmd(layr->up, ctrl, layr->id);
+}
+
+struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
+ u8 braddr[ETH_ALEN])
+{
+ struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC);
+
+ if (!this) {
+ pr_warn("Out of memory\n");
+ return NULL;
+ }
+ caif_assert(offsetof(struct cfusbl, layer) == 0);
+
+ memset(this, 0, sizeof(struct cflayer));
+ this->layer.receive = cfusbl_receive;
+ this->layer.transmit = cfusbl_transmit;
+ this->layer.ctrlcmd = cfusbl_ctrlcmd;
+ snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "usb%d", phyid);
+ this->layer.id = phyid;
+
+ /*
+ * Construct TX ethernet header:
+ * 0-5 destination address
+ * 5-11 source address
+ * 12-13 protocol type
+ */
+ memcpy(&this->tx_eth_hdr[ETH_ALEN], braddr, ETH_ALEN);
+ memcpy(&this->tx_eth_hdr[ETH_ALEN], ethaddr, ETH_ALEN);
+ this->tx_eth_hdr[12] = cpu_to_be16(ETH_P_802_EX1) & 0xff;
+ this->tx_eth_hdr[13] = (cpu_to_be16(ETH_P_802_EX1) >> 8) & 0xff;
+ pr_debug("caif ethernet TX-header dst:%pM src:%pM type:%02x%02x\n",
+ this->tx_eth_hdr, this->tx_eth_hdr + ETH_ALEN,
+ this->tx_eth_hdr[12], this->tx_eth_hdr[13]);
+
+ return (struct cflayer *) this;
+}
+
+static struct packet_type caif_usb_type __read_mostly = {
+ .type = cpu_to_be16(ETH_P_802_EX1),
+};
+
+static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
+ void *arg)
+{
+ struct net_device *dev = arg;
+ struct caif_dev_common common;
+ struct cflayer *layer, *link_support;
+ struct usbnet *usbnet = netdev_priv(dev);
+ struct usb_device *usbdev = usbnet->udev;
+ struct ethtool_drvinfo drvinfo;
+
+ /*
+ * Quirks: High-jack ethtool to find if we have a NCM device,
+ * and find it's VID/PID.
+ */
+ if (dev->ethtool_ops == NULL || dev->ethtool_ops->get_drvinfo == NULL)
+ return 0;
+
+ dev->ethtool_ops->get_drvinfo(dev, &drvinfo);
+ if (strncmp(drvinfo.driver, "cdc_ncm", 7) != 0)
+ return 0;
+
+ pr_debug("USB CDC NCM device VID:0x%4x PID:0x%4x\n",
+ le16_to_cpu(usbdev->descriptor.idVendor),
+ le16_to_cpu(usbdev->descriptor.idProduct));
+
+ /* Check for VID/PID that supports CAIF */
+ if (!(le16_to_cpu(usbdev->descriptor.idVendor) == STE_USB_VID &&
+ le16_to_cpu(usbdev->descriptor.idProduct) == STE_USB_PID_CAIF))
+ return 0;
+
+ if (what == NETDEV_UNREGISTER)
+ module_put(THIS_MODULE);
+
+ if (what != NETDEV_REGISTER)
+ return 0;
+
+ __module_get(THIS_MODULE);
+
+ memset(&common, 0, sizeof(common));
+ common.use_frag = false;
+ common.use_fcs = false;
+ common.use_stx = false;
+ common.link_select = CAIF_LINK_HIGH_BANDW;
+ common.flowctrl = NULL;
+
+ link_support = cfusbl_create(dev->ifindex, dev->dev_addr,
+ dev->broadcast);
+
+ if (!link_support)
+ return -ENOMEM;
+
+ if (dev->num_tx_queues > 1)
+ pr_warn("USB device uses more than one tx queue\n");
+
+ caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
+ &layer, &caif_usb_type.func);
+ if (!pack_added)
+ dev_add_pack(&caif_usb_type);
+ pack_added = true;
+
+ strncpy(layer->name, dev->name,
+ sizeof(layer->name) - 1);
+ layer->name[sizeof(layer->name) - 1] = 0;
+
+ return 0;
+}
+
+static struct notifier_block caif_device_notifier = {
+ .notifier_call = cfusbl_device_notify,
+ .priority = 0,
+};
+
+static int __init cfusbl_init(void)
+{
+ return register_netdevice_notifier(&caif_device_notifier);
+}
+
+static void __exit cfusbl_exit(void)
+{
+ unregister_netdevice_notifier(&caif_device_notifier);
+ dev_remove_pack(&caif_usb_type);
+}
+
+module_init(cfusbl_init);
+module_exit(cfusbl_exit);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 00523ecc4ced..598aafb4cb51 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -45,8 +45,8 @@ struct cfcnfg_phyinfo {
/* Interface index */
int ifindex;
- /* Use Start of frame extension */
- bool use_stx;
+ /* Protocol head room added for CAIF link layer */
+ int head_room;
/* Use Start of frame checksum */
bool use_fcs;
@@ -187,11 +187,11 @@ int caif_disconnect_client(struct net *net, struct cflayer *adap_layer)
if (channel_id != 0) {
struct cflayer *servl;
servl = cfmuxl_remove_uplayer(cfg->mux, channel_id);
+ cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer);
if (servl != NULL)
layer_set_up(servl, NULL);
} else
pr_debug("nothing to disconnect\n");
- cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer);
/* Do RCU sync before initiating cleanup */
synchronize_rcu();
@@ -350,9 +350,7 @@ int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
*ifindex = phy->ifindex;
*proto_tail = 2;
- *proto_head =
-
- protohead[param.linktype] + (phy->use_stx ? 1 : 0);
+ *proto_head = protohead[param.linktype] + phy->head_room;
rcu_read_unlock();
@@ -460,13 +458,13 @@ unlock:
}
void
-cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
+cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
struct net_device *dev, struct cflayer *phy_layer,
enum cfcnfg_phy_preference pref,
- bool fcs, bool stx)
+ struct cflayer *link_support,
+ bool fcs, int head_room)
{
struct cflayer *frml;
- struct cflayer *phy_driver = NULL;
struct cfcnfg_phyinfo *phyinfo = NULL;
int i;
u8 phyid;
@@ -482,26 +480,13 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
goto got_phyid;
}
pr_warn("Too many CAIF Link Layers (max 6)\n");
- goto out_err;
+ goto out;
got_phyid:
phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
if (!phyinfo)
goto out_err;
- switch (phy_type) {
- case CFPHYTYPE_FRAG:
- phy_driver =
- cfserl_create(CFPHYTYPE_FRAG, phyid, stx);
- if (!phy_driver)
- goto out_err;
- break;
- case CFPHYTYPE_CAIF:
- phy_driver = NULL;
- break;
- default:
- goto out_err;
- }
phy_layer->id = phyid;
phyinfo->pref = pref;
phyinfo->id = phyid;
@@ -509,7 +494,7 @@ got_phyid:
phyinfo->dev_info.dev = dev;
phyinfo->phy_layer = phy_layer;
phyinfo->ifindex = dev->ifindex;
- phyinfo->use_stx = stx;
+ phyinfo->head_room = head_room;
phyinfo->use_fcs = fcs;
frml = cffrml_create(phyid, fcs);
@@ -519,23 +504,23 @@ got_phyid:
phyinfo->frm_layer = frml;
layer_set_up(frml, cnfg->mux);
- if (phy_driver != NULL) {
- phy_driver->id = phyid;
- layer_set_dn(frml, phy_driver);
- layer_set_up(phy_driver, frml);
- layer_set_dn(phy_driver, phy_layer);
- layer_set_up(phy_layer, phy_driver);
+ if (link_support != NULL) {
+ link_support->id = phyid;
+ layer_set_dn(frml, link_support);
+ layer_set_up(link_support, frml);
+ layer_set_dn(link_support, phy_layer);
+ layer_set_up(phy_layer, link_support);
} else {
layer_set_dn(frml, phy_layer);
layer_set_up(phy_layer, frml);
}
list_add_rcu(&phyinfo->node, &cnfg->phys);
+out:
mutex_unlock(&cnfg->lock);
return;
out_err:
- kfree(phy_driver);
kfree(phyinfo);
mutex_unlock(&cnfg->lock);
}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index f39921171d0d..d3ca87bf23b7 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -136,20 +136,21 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
{
- int tmp;
u16 chks;
u16 len;
+ __le16 data;
+
struct cffrml *this = container_obj(layr);
if (this->dofcs) {
chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
- tmp = cpu_to_le16(chks);
- cfpkt_add_trail(pkt, &tmp, 2);
+ data = cpu_to_le16(chks);
+ cfpkt_add_trail(pkt, &data, 2);
} else {
cfpkt_pad_trail(pkt, 2);
}
len = cfpkt_getlen(pkt);
- tmp = cpu_to_le16(len);
- cfpkt_add_head(pkt, &tmp, 2);
+ data = cpu_to_le16(len);
+ cfpkt_add_head(pkt, &data, 2);
cfpkt_info(pkt)->hdr_len += 2;
if (cfpkt_erroneous(pkt)) {
pr_err("Packet is erroneous!\n");
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index df08c47183d4..e335ba859b97 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -63,7 +63,6 @@ static inline struct cfpkt *skb_to_pkt(struct sk_buff *skb)
return (struct cfpkt *) skb;
}
-
struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt)
{
struct cfpkt *pkt = skb_to_pkt(nativepkt);
@@ -105,14 +104,12 @@ void cfpkt_destroy(struct cfpkt *pkt)
kfree_skb(skb);
}
-
inline bool cfpkt_more(struct cfpkt *pkt)
{
struct sk_buff *skb = pkt_to_skb(pkt);
return skb->len > 0;
}
-
int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
{
struct sk_buff *skb = pkt_to_skb(pkt);
@@ -144,9 +141,11 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
}
from = skb_pull(skb, len);
from -= len;
- memcpy(data, from, len);
+ if (data)
+ memcpy(data, from, len);
return 0;
}
+EXPORT_SYMBOL(cfpkt_extr_head);
int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
{
@@ -170,13 +169,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
return 0;
}
-
int cfpkt_pad_trail(struct cfpkt *pkt, u16 len)
{
return cfpkt_add_body(pkt, NULL, len);
}
-
int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
{
struct sk_buff *skb = pkt_to_skb(pkt);
@@ -255,21 +252,19 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
memcpy(to, data, len);
return 0;
}
-
+EXPORT_SYMBOL(cfpkt_add_head);
inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len)
{
return cfpkt_add_body(pkt, data, len);
}
-
inline u16 cfpkt_getlen(struct cfpkt *pkt)
{
struct sk_buff *skb = pkt_to_skb(pkt);
return skb->len;
}
-
inline u16 cfpkt_iterate(struct cfpkt *pkt,
u16 (*iter_func)(u16, void *, u16),
u16 data)
@@ -287,7 +282,6 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
}
-
int cfpkt_setlen(struct cfpkt *pkt, u16 len)
{
struct sk_buff *skb = pkt_to_skb(pkt);
@@ -399,3 +393,4 @@ struct caif_payload_info *cfpkt_info(struct cfpkt *pkt)
{
return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb;
}
+EXPORT_SYMBOL(cfpkt_info);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 81660f809713..6dc75d4f8d94 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -190,7 +190,7 @@ out:
static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
{
- caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
+ caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size + RFM_HEAD_SIZE);
/* Add info for MUX-layer to route the packet out. */
cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 797c8d165993..8e68b97f13ee 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -31,7 +31,7 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
int phyid);
-struct cflayer *cfserl_create(int type, int instance, bool use_stx)
+struct cflayer *cfserl_create(int instance, bool use_stx)
{
struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC);
if (!this)
@@ -40,7 +40,6 @@ struct cflayer *cfserl_create(int type, int instance, bool use_stx)
this->layer.receive = cfserl_receive;
this->layer.transmit = cfserl_transmit;
this->layer.ctrlcmd = cfserl_ctrlcmd;
- this->layer.type = type;
this->usestx = use_stx;
spin_lock_init(&this->sync);
snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1");
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 42599e31dcad..3a94eae7abe9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -477,7 +477,6 @@ int crush_do_rule(struct crush_map *map,
int i, j;
int numrep;
int firstn;
- int rc = -1;
BUG_ON(ruleno >= map->max_rules);
@@ -491,23 +490,18 @@ int crush_do_rule(struct crush_map *map,
* that this may or may not correspond to the specific types
* referenced by the crush rule.
*/
- if (force >= 0) {
- if (force >= map->max_devices ||
- map->device_parents[force] == 0) {
- /*dprintk("CRUSH: forcefed device dne\n");*/
- rc = -1; /* force fed device dne */
- goto out;
- }
- if (!is_out(map, weight, force, x)) {
- while (1) {
- force_context[++force_pos] = force;
- if (force >= 0)
- force = map->device_parents[force];
- else
- force = map->bucket_parents[-1-force];
- if (force == 0)
- break;
- }
+ if (force >= 0 &&
+ force < map->max_devices &&
+ map->device_parents[force] != 0 &&
+ !is_out(map, weight, force, x)) {
+ while (1) {
+ force_context[++force_pos] = force;
+ if (force >= 0)
+ force = map->device_parents[force];
+ else
+ force = map->bucket_parents[-1-force];
+ if (force == 0)
+ break;
}
}
@@ -600,10 +594,7 @@ int crush_do_rule(struct crush_map *map,
BUG_ON(1);
}
}
- rc = result_len;
-
-out:
- return rc;
+ return result_len;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 733e46008b89..f4f3f58f5234 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -244,7 +244,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
ceph_pagelist_init(req->r_trail);
}
/* create request message; allow space for oid */
- msg_size += 40;
+ msg_size += MAX_OBJ_NAME_SIZE;
if (snapc)
msg_size += sizeof(u64) * snapc->num_snaps;
if (use_mempool)
diff --git a/net/core/Makefile b/net/core/Makefile
index 0d357b1c4e57..674641b13aea 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -3,12 +3,13 @@
#
obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o net_namespace.o secure_seq.o
+ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
- neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
+ sock_diag.o
obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
@@ -19,3 +20,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
obj-$(CONFIG_TRACEPOINTS) += net-traces.o
obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
+obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ba50a1e404c..f494675471a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,10 +133,9 @@
#include <linux/pci.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
-#include <linux/if_tunnel.h>
-#include <linux/if_pppox.h>
-#include <linux/ppp_defs.h>
#include <linux/net_tstamp.h>
+#include <linux/jump_label.h>
+#include <net/flow_keys.h>
#include "net-sysfs.h"
@@ -1320,8 +1319,6 @@ EXPORT_SYMBOL(dev_close);
*/
void dev_disable_lro(struct net_device *dev)
{
- u32 flags;
-
/*
* If we're trying to disable lro on a vlan device
* use the underlying physical device instead
@@ -1329,15 +1326,9 @@ void dev_disable_lro(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
- if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
- flags = dev->ethtool_ops->get_flags(dev);
- else
- flags = ethtool_op_get_flags(dev);
+ dev->wanted_features &= ~NETIF_F_LRO;
+ netdev_update_features(dev);
- if (!(flags & ETH_FLAG_LRO))
- return;
-
- __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
if (unlikely(dev->features & NETIF_F_LRO))
netdev_WARN(dev, "failed to disable LRO!\n");
}
@@ -1396,7 +1387,7 @@ rollback:
for_each_net(net) {
for_each_netdev(net, dev) {
if (dev == last)
- break;
+ goto outroll;
if (dev->flags & IFF_UP) {
nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
@@ -1407,6 +1398,7 @@ rollback:
}
}
+outroll:
raw_notifier_chain_unregister(&netdev_chain, nb);
goto unlock;
}
@@ -1449,34 +1441,55 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
-/* When > 0 there are consumers of rx skb time stamps */
-static atomic_t netstamp_needed = ATOMIC_INIT(0);
+static struct jump_label_key netstamp_needed __read_mostly;
+#ifdef HAVE_JUMP_LABEL
+/* We are not allowed to call jump_label_dec() from irq context
+ * If net_disable_timestamp() is called from irq context, defer the
+ * jump_label_dec() calls.
+ */
+static atomic_t netstamp_needed_deferred;
+#endif
void net_enable_timestamp(void)
{
- atomic_inc(&netstamp_needed);
+#ifdef HAVE_JUMP_LABEL
+ int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+
+ if (deferred) {
+ while (--deferred)
+ jump_label_dec(&netstamp_needed);
+ return;
+ }
+#endif
+ WARN_ON(in_interrupt());
+ jump_label_inc(&netstamp_needed);
}
EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
- atomic_dec(&netstamp_needed);
+#ifdef HAVE_JUMP_LABEL
+ if (in_interrupt()) {
+ atomic_inc(&netstamp_needed_deferred);
+ return;
+ }
+#endif
+ jump_label_dec(&netstamp_needed);
}
EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
- if (atomic_read(&netstamp_needed))
+ skb->tstamp.tv64 = 0;
+ if (static_branch(&netstamp_needed))
__net_timestamp(skb);
- else
- skb->tstamp.tv64 = 0;
}
-static inline void net_timestamp_check(struct sk_buff *skb)
-{
- if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
- __net_timestamp(skb);
-}
+#define net_timestamp_check(COND, SKB) \
+ if (static_branch(&netstamp_needed)) { \
+ if ((COND) && !(SKB)->tstamp.tv64) \
+ __net_timestamp(SKB); \
+ } \
static int net_hwtstamp_validate(struct ifreq *ifr)
{
@@ -1923,7 +1936,8 @@ EXPORT_SYMBOL(skb_checksum_help);
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
*/
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
@@ -1953,9 +1967,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
dev->ethtool_ops->get_drvinfo(dev, &info);
- WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
- info.driver, dev ? dev->features : 0L,
- skb->sk ? skb->sk->sk_route_caps : 0L,
+ WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n",
+ info.driver, dev ? &dev->features : NULL,
+ skb->sk ? &skb->sk->sk_route_caps : NULL,
skb->len, skb->data_len, skb->ip_summed);
if (skb_header_cloned(skb) &&
@@ -2064,7 +2078,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
-static int dev_gso_segment(struct sk_buff *skb, int features)
+static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
struct sk_buff *segs;
@@ -2103,7 +2117,7 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
{
return ((features & NETIF_F_GEN_CSUM) ||
((features & NETIF_F_V4_CSUM) &&
@@ -2114,7 +2128,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
protocol == htons(ETH_P_FCOE)));
}
-static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
+static netdev_features_t harmonize_features(struct sk_buff *skb,
+ __be16 protocol, netdev_features_t features)
{
if (!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
@@ -2126,10 +2141,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features
return features;
}
-u32 netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
- u32 features = skb->dev->features;
+ netdev_features_t features = skb->dev->features;
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2175,7 +2190,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
unsigned int skb_len;
if (likely(!skb->next)) {
- u32 features;
+ netdev_features_t features;
/*
* If device doesn't need skb->dst, release it right now while
@@ -2256,7 +2271,7 @@ gso:
return rc;
}
txq_trans_update(txq);
- if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
+ if (unlikely(netif_xmit_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -2456,6 +2471,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return rc;
}
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+static void skb_update_prio(struct sk_buff *skb)
+{
+ struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+
+ if ((!skb->priority) && (skb->sk) && map)
+ skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+}
+#else
+#define skb_update_prio(skb)
+#endif
+
static DEFINE_PER_CPU(int, xmit_recursion);
#define RECURSION_LIMIT 10
@@ -2496,6 +2523,8 @@ int dev_queue_xmit(struct sk_buff *skb)
*/
rcu_read_lock_bh();
+ skb_update_prio(skb);
+
txq = dev_pick_tx(dev, skb);
q = rcu_dereference_bh(txq->qdisc);
@@ -2530,7 +2559,7 @@ int dev_queue_xmit(struct sk_buff *skb)
HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_tx_queue_stopped(txq)) {
+ if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
__this_cpu_dec(xmit_recursion);
@@ -2591,123 +2620,28 @@ static inline void ____napi_schedule(struct softnet_data *sd,
*/
void __skb_get_rxhash(struct sk_buff *skb)
{
- int nhoff, hash = 0, poff;
- const struct ipv6hdr *ip6;
- const struct iphdr *ip;
- const struct vlan_hdr *vlan;
- u8 ip_proto;
- u32 addr1, addr2;
- u16 proto;
- union {
- u32 v32;
- u16 v16[2];
- } ports;
-
- nhoff = skb_network_offset(skb);
- proto = skb->protocol;
-
-again:
- switch (proto) {
- case __constant_htons(ETH_P_IP):
-ip:
- if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
- goto done;
-
- ip = (const struct iphdr *) (skb->data + nhoff);
- if (ip_is_fragment(ip))
- ip_proto = 0;
- else
- ip_proto = ip->protocol;
- addr1 = (__force u32) ip->saddr;
- addr2 = (__force u32) ip->daddr;
- nhoff += ip->ihl * 4;
- break;
- case __constant_htons(ETH_P_IPV6):
-ipv6:
- if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
- goto done;
-
- ip6 = (const struct ipv6hdr *) (skb->data + nhoff);
- ip_proto = ip6->nexthdr;
- addr1 = (__force u32) ip6->saddr.s6_addr32[3];
- addr2 = (__force u32) ip6->daddr.s6_addr32[3];
- nhoff += 40;
- break;
- case __constant_htons(ETH_P_8021Q):
- if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
- goto done;
- vlan = (const struct vlan_hdr *) (skb->data + nhoff);
- proto = vlan->h_vlan_encapsulated_proto;
- nhoff += sizeof(*vlan);
- goto again;
- case __constant_htons(ETH_P_PPP_SES):
- if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
- goto done;
- proto = *((__be16 *) (skb->data + nhoff +
- sizeof(struct pppoe_hdr)));
- nhoff += PPPOE_SES_HLEN;
- switch (proto) {
- case __constant_htons(PPP_IP):
- goto ip;
- case __constant_htons(PPP_IPV6):
- goto ipv6;
- default:
- goto done;
- }
- default:
- goto done;
- }
-
- switch (ip_proto) {
- case IPPROTO_GRE:
- if (pskb_may_pull(skb, nhoff + 16)) {
- u8 *h = skb->data + nhoff;
- __be16 flags = *(__be16 *)h;
+ struct flow_keys keys;
+ u32 hash;
- /*
- * Only look inside GRE if version zero and no
- * routing
- */
- if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
- proto = *(__be16 *)(h + 2);
- nhoff += 4;
- if (flags & GRE_CSUM)
- nhoff += 4;
- if (flags & GRE_KEY)
- nhoff += 4;
- if (flags & GRE_SEQ)
- nhoff += 4;
- goto again;
- }
- }
- break;
- case IPPROTO_IPIP:
- goto again;
- default:
- break;
- }
+ if (!skb_flow_dissect(skb, &keys))
+ return;
- ports.v32 = 0;
- poff = proto_ports_offset(ip_proto);
- if (poff >= 0) {
- nhoff += poff;
- if (pskb_may_pull(skb, nhoff + 4)) {
- ports.v32 = * (__force u32 *) (skb->data + nhoff);
- if (ports.v16[1] < ports.v16[0])
- swap(ports.v16[0], ports.v16[1]);
- skb->l4_rxhash = 1;
- }
+ if (keys.ports) {
+ if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
+ swap(keys.port16[0], keys.port16[1]);
+ skb->l4_rxhash = 1;
}
/* get a consistent hash (same value on both flow directions) */
- if (addr2 < addr1)
- swap(addr1, addr2);
+ if ((__force u32)keys.dst < (__force u32)keys.src)
+ swap(keys.dst, keys.src);
- hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+ hash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src,
+ (__force u32)keys.ports, hashrnd);
if (!hash)
hash = 1;
-done:
skb->rxhash = hash;
}
EXPORT_SYMBOL(__skb_get_rxhash);
@@ -2718,6 +2652,8 @@ EXPORT_SYMBOL(__skb_get_rxhash);
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+struct jump_label_key rps_needed __read_mostly;
+
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
@@ -2997,12 +2933,11 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
#ifdef CONFIG_RPS
- {
+ if (static_branch(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
@@ -3017,14 +2952,13 @@ int netif_rx(struct sk_buff *skb)
rcu_read_unlock();
preempt_enable();
- }
-#else
+ } else
+#endif
{
unsigned int qtail;
ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
put_cpu();
}
-#endif
return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -3230,8 +3164,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
int ret = NET_RX_DROP;
__be16 type;
- if (!netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(!netdev_tstamp_prequeue, skb);
trace_netif_receive_skb(skb);
@@ -3362,14 +3295,13 @@ out:
*/
int netif_receive_skb(struct sk_buff *skb)
{
- if (netdev_tstamp_prequeue)
- net_timestamp_check(skb);
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
#ifdef CONFIG_RPS
- {
+ if (static_branch(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu, ret;
@@ -3380,16 +3312,12 @@ int netif_receive_skb(struct sk_buff *skb)
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- } else {
- rcu_read_unlock();
- ret = __netif_receive_skb(skb);
+ return ret;
}
-
- return ret;
+ rcu_read_unlock();
}
-#else
- return __netif_receive_skb(skb);
#endif
+ return __netif_receive_skb(skb);
}
EXPORT_SYMBOL(netif_receive_skb);
@@ -4282,6 +4210,12 @@ static int dev_seq_open(struct inode *inode, struct file *file)
sizeof(struct dev_iter_state));
}
+int dev_seq_open_ops(struct inode *inode, struct file *file,
+ const struct seq_operations *ops)
+{
+ return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+}
+
static const struct file_operations dev_seq_fops = {
.owner = THIS_MODULE,
.open = dev_seq_open,
@@ -4532,7 +4466,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
- unsigned short old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
uid_t uid;
gid_t gid;
@@ -4589,7 +4523,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
*/
int dev_set_promiscuity(struct net_device *dev, int inc)
{
- unsigned short old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
int err;
err = __dev_set_promiscuity(dev, inc);
@@ -4616,7 +4550,7 @@ EXPORT_SYMBOL(dev_set_promiscuity);
int dev_set_allmulti(struct net_device *dev, int inc)
{
- unsigned short old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
ASSERT_RTNL();
@@ -4719,7 +4653,7 @@ EXPORT_SYMBOL(dev_get_flags);
int __dev_change_flags(struct net_device *dev, unsigned int flags)
{
- int old_flags = dev->flags;
+ unsigned int old_flags = dev->flags;
int ret;
ASSERT_RTNL();
@@ -4802,10 +4736,10 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
* Change settings on device based state flags. The flags are
* in the userspace exported format.
*/
-int dev_change_flags(struct net_device *dev, unsigned flags)
+int dev_change_flags(struct net_device *dev, unsigned int flags)
{
- int ret, changes;
- int old_flags = dev->flags;
+ int ret;
+ unsigned int changes, old_flags = dev->flags;
ret = __dev_change_flags(dev, flags);
if (ret < 0)
@@ -5362,7 +5296,8 @@ static void rollback_registered(struct net_device *dev)
list_del(&single);
}
-static u32 netdev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netdev_fix_features(struct net_device *dev,
+ netdev_features_t features)
{
/* Fix illegal checksum combinations */
if ((features & NETIF_F_HW_CSUM) &&
@@ -5371,12 +5306,6 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
}
- if ((features & NETIF_F_NO_CSUM) &&
- (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- netdev_warn(dev, "mixed no checksumming and other settings.\n");
- features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
- }
-
/* Fix illegal SG+CSUM combinations. */
if ((features & NETIF_F_SG) &&
!(features & NETIF_F_ALL_CSUM)) {
@@ -5424,7 +5353,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
int __netdev_update_features(struct net_device *dev)
{
- u32 features;
+ netdev_features_t features;
int err = 0;
ASSERT_RTNL();
@@ -5440,16 +5369,16 @@ int __netdev_update_features(struct net_device *dev)
if (dev->features == features)
return 0;
- netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
- dev->features, features);
+ netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
+ &dev->features, &features);
if (dev->netdev_ops->ndo_set_features)
err = dev->netdev_ops->ndo_set_features(dev, features);
if (unlikely(err < 0)) {
netdev_err(dev,
- "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
- err, features, dev->features);
+ "set_features() failed (%d); wanted %pNF, left %pNF\n",
+ err, &features, &dev->features);
return -1;
}
@@ -5548,6 +5477,9 @@ static void netdev_init_one_queue(struct net_device *dev,
queue->xmit_lock_owner = -1;
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
+#ifdef CONFIG_BQL
+ dql_init(&queue->dql, HZ);
+#endif
}
static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -5633,11 +5565,12 @@ int register_netdevice(struct net_device *dev)
dev->wanted_features = dev->features & dev->hw_features;
/* Turn on no cache copy if HW is doing checksum */
- dev->hw_features |= NETIF_F_NOCACHE_COPY;
- if ((dev->features & NETIF_F_ALL_CSUM) &&
- !(dev->features & NETIF_F_NO_CSUM)) {
- dev->wanted_features |= NETIF_F_NOCACHE_COPY;
- dev->features |= NETIF_F_NOCACHE_COPY;
+ if (!(dev->flags & IFF_LOOPBACK)) {
+ dev->hw_features |= NETIF_F_NOCACHE_COPY;
+ if (dev->features & NETIF_F_ALL_CSUM) {
+ dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+ dev->features |= NETIF_F_NOCACHE_COPY;
+ }
}
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6373,7 +6306,8 @@ static int dev_cpu_callback(struct notifier_block *nfb,
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
*/
-u32 netdev_increment_features(u32 all, u32 one, u32 mask)
+netdev_features_t netdev_increment_features(netdev_features_t all,
+ netdev_features_t one, netdev_features_t mask)
{
if (mask & NETIF_F_GEN_CSUM)
mask |= NETIF_F_ALL_CSUM;
@@ -6382,10 +6316,6 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
all &= one | ~NETIF_F_ALL_FOR_ALL;
- /* If device needs checksumming, downgrade to it. */
- if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
- all &= ~NETIF_F_NO_CSUM;
-
/* If one device supports hw checksumming, set for all. */
if (all & NETIF_F_GEN_CSUM)
all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 277faef9148d..febba516db62 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = {
static int dev_mc_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_net(inode, file, &dev_mc_seq_ops,
- sizeof(struct seq_net_private));
+ return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
}
static const struct file_operations dev_mc_seq_fops = {
diff --git a/net/core/dst.c b/net/core/dst.c
index d5e2c4c09107..43d94cedbf7c 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -366,7 +366,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
dev_hold(dst->dev);
dev_put(dev);
rcu_read_lock();
- neigh = dst_get_neighbour(dst);
+ neigh = dst_get_neighbour_noref(dst);
if (neigh && neigh->dev == dev) {
neigh->dev = dst->dev;
dev_hold(dst->dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f44481707124..597732c989ca 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -36,235 +36,44 @@ u32 ethtool_op_get_link(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_link);
-u32 ethtool_op_get_tx_csum(struct net_device *dev)
-{
- return (dev->features & NETIF_F_ALL_CSUM) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tx_csum);
-
-int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_IP_CSUM;
- else
- dev->features &= ~NETIF_F_IP_CSUM;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-
-int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_HW_CSUM;
- else
- dev->features &= ~NETIF_F_HW_CSUM;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-
-int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
- else
- dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-
-u32 ethtool_op_get_sg(struct net_device *dev)
-{
- return (dev->features & NETIF_F_SG) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_sg);
-
-int ethtool_op_set_sg(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_SG;
- else
- dev->features &= ~NETIF_F_SG;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_sg);
-
-u32 ethtool_op_get_tso(struct net_device *dev)
-{
- return (dev->features & NETIF_F_TSO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tso);
-
-int ethtool_op_set_tso(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_TSO;
- else
- dev->features &= ~NETIF_F_TSO;
-
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tso);
-
-u32 ethtool_op_get_ufo(struct net_device *dev)
-{
- return (dev->features & NETIF_F_UFO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-
-int ethtool_op_set_ufo(struct net_device *dev, u32 data)
-{
- if (data)
- dev->features |= NETIF_F_UFO;
- else
- dev->features &= ~NETIF_F_UFO;
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-
-/* the following list of flags are the same as their associated
- * NETIF_F_xxx values in include/linux/netdevice.h
- */
-static const u32 flags_dup_features =
- (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
- ETH_FLAG_RXHASH);
-
-u32 ethtool_op_get_flags(struct net_device *dev)
-{
- /* in the future, this function will probably contain additional
- * handling for flags which are not so easily handled
- * by a simple masking operation
- */
-
- return dev->features & flags_dup_features;
-}
-EXPORT_SYMBOL(ethtool_op_get_flags);
-
-/* Check if device can enable (or disable) particular feature coded in "data"
- * argument. Flags "supported" describe features that can be toggled by device.
- * If feature can not be toggled, it state (enabled or disabled) must match
- * hardcoded device features state, otherwise flags are marked as invalid.
- */
-bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
-{
- u32 features = dev->features & flags_dup_features;
- /* "data" can contain only flags_dup_features bits,
- * see __ethtool_set_flags */
-
- return (features & ~supported) != (data & ~supported);
-}
-EXPORT_SYMBOL(ethtool_invalid_flags);
-
-int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
- if (ethtool_invalid_flags(dev, data, supported))
- return -EINVAL;
-
- dev->features = ((dev->features & ~flags_dup_features) |
- (data & flags_dup_features));
- return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_flags);
-
/* Handlers for each ethtool command */
-#define ETHTOOL_DEV_FEATURE_WORDS 1
-
-static void ethtool_get_features_compat(struct net_device *dev,
- struct ethtool_get_features_block *features)
-{
- if (!dev->ethtool_ops)
- return;
-
- /* getting RX checksum */
- if (dev->ethtool_ops->get_rx_csum)
- if (dev->ethtool_ops->get_rx_csum(dev))
- features[0].active |= NETIF_F_RXCSUM;
-
- /* mark legacy-changeable features */
- if (dev->ethtool_ops->set_sg)
- features[0].available |= NETIF_F_SG;
- if (dev->ethtool_ops->set_tx_csum)
- features[0].available |= NETIF_F_ALL_CSUM;
- if (dev->ethtool_ops->set_tso)
- features[0].available |= NETIF_F_ALL_TSO;
- if (dev->ethtool_ops->set_rx_csum)
- features[0].available |= NETIF_F_RXCSUM;
- if (dev->ethtool_ops->set_flags)
- features[0].available |= flags_dup_features;
-}
-
-static int ethtool_set_feature_compat(struct net_device *dev,
- int (*legacy_set)(struct net_device *, u32),
- struct ethtool_set_features_block *features, u32 mask)
-{
- u32 do_set;
-
- if (!legacy_set)
- return 0;
-
- if (!(features[0].valid & mask))
- return 0;
-
- features[0].valid &= ~mask;
-
- do_set = !!(features[0].requested & mask);
-
- if (legacy_set(dev, do_set) < 0)
- netdev_info(dev,
- "Legacy feature change (%s) failed for 0x%08x\n",
- do_set ? "set" : "clear", mask);
-
- return 1;
-}
-
-static int ethtool_set_flags_compat(struct net_device *dev,
- int (*legacy_set)(struct net_device *, u32),
- struct ethtool_set_features_block *features, u32 mask)
-{
- u32 value;
-
- if (!legacy_set)
- return 0;
-
- if (!(features[0].valid & mask))
- return 0;
-
- value = dev->features & ~features[0].valid;
- value |= features[0].requested;
-
- features[0].valid &= ~mask;
-
- if (legacy_set(dev, value & mask) < 0)
- netdev_info(dev, "Legacy flags change failed\n");
-
- return 1;
-}
-
-static int ethtool_set_features_compat(struct net_device *dev,
- struct ethtool_set_features_block *features)
-{
- int compat;
-
- if (!dev->ethtool_ops)
- return 0;
-
- compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
- features, NETIF_F_SG);
- compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
- features, NETIF_F_ALL_CSUM);
- compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
- features, NETIF_F_ALL_TSO);
- compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
- features, NETIF_F_RXCSUM);
- compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags,
- features, flags_dup_features);
-
- return compat;
-}
+#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32)
+
+static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
+ [NETIF_F_SG_BIT] = "tx-scatter-gather",
+ [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
+ [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
+ [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
+ [NETIF_F_HIGHDMA_BIT] = "highdma",
+ [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
+ [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert",
+
+ [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse",
+ [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter",
+ [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
+ [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
+ [NETIF_F_LLTX_BIT] = "tx-lockless",
+ [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
+ [NETIF_F_GRO_BIT] = "rx-gro",
+ [NETIF_F_LRO_BIT] = "rx-lro",
+
+ [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
+ [NETIF_F_UFO_BIT] = "tx-udp-fragmentation",
+ [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
+ [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
+ [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
+ [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
+
+ [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
+ [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
+ [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
+ [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
+ [NETIF_F_RXHASH_BIT] = "rx-hashing",
+ [NETIF_F_RXCSUM_BIT] = "rx-checksum",
+ [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
+ [NETIF_F_LOOPBACK_BIT] = "loopback",
+};
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
@@ -272,18 +81,21 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
.cmd = ETHTOOL_GFEATURES,
.size = ETHTOOL_DEV_FEATURE_WORDS,
};
- struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = {
- {
- .available = dev->hw_features,
- .requested = dev->wanted_features,
- .active = dev->features,
- .never_changed = NETIF_F_NEVER_CHANGE,
- },
- };
+ struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
u32 __user *sizeaddr;
u32 copy_size;
+ int i;
- ethtool_get_features_compat(dev, features);
+ /* in case feature bits run out again */
+ BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t));
+
+ for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
+ features[i].available = (u32)(dev->hw_features >> (32 * i));
+ features[i].requested = (u32)(dev->wanted_features >> (32 * i));
+ features[i].active = (u32)(dev->features >> (32 * i));
+ features[i].never_changed =
+ (u32)(NETIF_F_NEVER_CHANGE >> (32 * i));
+ }
sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
if (get_user(copy_size, sizeaddr))
@@ -305,7 +117,8 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_sfeatures cmd;
struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
- int ret = 0;
+ netdev_features_t wanted = 0, valid = 0;
+ int i, ret = 0;
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
@@ -317,65 +130,29 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
if (copy_from_user(features, useraddr, sizeof(features)))
return -EFAULT;
- if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
- return -EINVAL;
+ for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) {
+ valid |= (netdev_features_t)features[i].valid << (32 * i);
+ wanted |= (netdev_features_t)features[i].requested << (32 * i);
+ }
- if (ethtool_set_features_compat(dev, features))
- ret |= ETHTOOL_F_COMPAT;
+ if (valid & ~NETIF_F_ETHTOOL_BITS)
+ return -EINVAL;
- if (features[0].valid & ~dev->hw_features) {
- features[0].valid &= dev->hw_features;
+ if (valid & ~dev->hw_features) {
+ valid &= dev->hw_features;
ret |= ETHTOOL_F_UNSUPPORTED;
}
- dev->wanted_features &= ~features[0].valid;
- dev->wanted_features |= features[0].valid & features[0].requested;
+ dev->wanted_features &= ~valid;
+ dev->wanted_features |= wanted & valid;
__netdev_update_features(dev);
- if ((dev->wanted_features ^ dev->features) & features[0].valid)
+ if ((dev->wanted_features ^ dev->features) & valid)
ret |= ETHTOOL_F_WISH;
return ret;
}
-static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
- /* NETIF_F_SG */ "tx-scatter-gather",
- /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
- /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded",
- /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic",
- /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6",
- /* NETIF_F_HIGHDMA */ "highdma",
- /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist",
- /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert",
-
- /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse",
- /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter",
- /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
- /* NETIF_F_GSO */ "tx-generic-segmentation",
- /* NETIF_F_LLTX */ "tx-lockless",
- /* NETIF_F_NETNS_LOCAL */ "netns-local",
- /* NETIF_F_GRO */ "rx-gro",
- /* NETIF_F_LRO */ "rx-lro",
-
- /* NETIF_F_TSO */ "tx-tcp-segmentation",
- /* NETIF_F_UFO */ "tx-udp-fragmentation",
- /* NETIF_F_GSO_ROBUST */ "tx-gso-robust",
- /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation",
- /* NETIF_F_TSO6 */ "tx-tcp6-segmentation",
- /* NETIF_F_FSO */ "tx-fcoe-segmentation",
- "",
- "",
-
- /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc",
- /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp",
- /* NETIF_F_FCOE_MTU */ "fcoe-mtu",
- /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
- /* NETIF_F_RXHASH */ "rx-hashing",
- /* NETIF_F_RXCSUM */ "rx-checksum",
- /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy",
- /* NETIF_F_LOOPBACK */ "loopback",
-};
-
static int __ethtool_get_sset_count(struct net_device *dev, int sset)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -402,7 +179,7 @@ static void __ethtool_get_strings(struct net_device *dev,
ops->get_strings(dev, stringset, data);
}
-static u32 ethtool_get_feature_mask(u32 eth_cmd)
+static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
{
/* feature masks of legacy discrete ethtool ops */
@@ -433,136 +210,82 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
}
}
-static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
-{
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops)
- return NULL;
-
- switch (ethcmd) {
- case ETHTOOL_GTXCSUM:
- return ops->get_tx_csum;
- case ETHTOOL_GRXCSUM:
- return ops->get_rx_csum;
- case ETHTOOL_SSG:
- return ops->get_sg;
- case ETHTOOL_STSO:
- return ops->get_tso;
- case ETHTOOL_SUFO:
- return ops->get_ufo;
- default:
- return NULL;
- }
-}
-
-static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
-{
- return !!(dev->features & NETIF_F_ALL_CSUM);
-}
-
static int ethtool_get_one_feature(struct net_device *dev,
char __user *useraddr, u32 ethcmd)
{
- u32 mask = ethtool_get_feature_mask(ethcmd);
+ netdev_features_t mask = ethtool_get_feature_mask(ethcmd);
struct ethtool_value edata = {
.cmd = ethcmd,
.data = !!(dev->features & mask),
};
- /* compatibility with discrete get_ ops */
- if (!(dev->hw_features & mask)) {
- u32 (*actor)(struct net_device *);
-
- actor = __ethtool_get_one_feature_actor(dev, ethcmd);
-
- /* bug compatibility with old get_rx_csum */
- if (ethcmd == ETHTOOL_GRXCSUM && !actor)
- actor = __ethtool_get_rx_csum_oldbug;
-
- if (actor)
- edata.data = actor(dev);
- }
-
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_sg(struct net_device *dev, u32 data);
-static int __ethtool_set_tso(struct net_device *dev, u32 data);
-static int __ethtool_set_ufo(struct net_device *dev, u32 data);
-
static int ethtool_set_one_feature(struct net_device *dev,
void __user *useraddr, u32 ethcmd)
{
struct ethtool_value edata;
- u32 mask;
+ netdev_features_t mask;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
mask = ethtool_get_feature_mask(ethcmd);
mask &= dev->hw_features;
- if (mask) {
- if (edata.data)
- dev->wanted_features |= mask;
- else
- dev->wanted_features &= ~mask;
+ if (!mask)
+ return -EOPNOTSUPP;
- __netdev_update_features(dev);
- return 0;
- }
+ if (edata.data)
+ dev->wanted_features |= mask;
+ else
+ dev->wanted_features &= ~mask;
- /* Driver is not converted to ndo_fix_features or does not
- * support changing this offload. In the latter case it won't
- * have corresponding ethtool_ops field set.
- *
- * Following part is to be removed after all drivers advertise
- * their changeable features in netdev->hw_features and stop
- * using discrete offload setting ops.
- */
+ __netdev_update_features(dev);
- switch (ethcmd) {
- case ETHTOOL_STXCSUM:
- return __ethtool_set_tx_csum(dev, edata.data);
- case ETHTOOL_SRXCSUM:
- return __ethtool_set_rx_csum(dev, edata.data);
- case ETHTOOL_SSG:
- return __ethtool_set_sg(dev, edata.data);
- case ETHTOOL_STSO:
- return __ethtool_set_tso(dev, edata.data);
- case ETHTOOL_SUFO:
- return __ethtool_set_ufo(dev, edata.data);
- default:
- return -EOPNOTSUPP;
- }
+ return 0;
}
-int __ethtool_set_flags(struct net_device *dev, u32 data)
+#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
+ ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
+#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \
+ NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH)
+
+static u32 __ethtool_get_flags(struct net_device *dev)
{
- u32 changed;
+ u32 flags = 0;
+
+ if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
+ if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN;
+ if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN;
+ if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
+ if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
+
+ return flags;
+}
- if (data & ~flags_dup_features)
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
+{
+ netdev_features_t features = 0, changed;
+
+ if (data & ~ETH_ALL_FLAGS)
return -EINVAL;
- /* legacy set_flags() op */
- if (dev->ethtool_ops->set_flags) {
- if (unlikely(dev->hw_features & flags_dup_features))
- netdev_warn(dev,
- "driver BUG: mixed hw_features and set_flags()\n");
- return dev->ethtool_ops->set_flags(dev, data);
- }
+ if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO;
+ if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX;
+ if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX;
+ if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE;
+ if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH;
/* allow changing only bits set in hw_features */
- changed = (data ^ dev->features) & flags_dup_features;
+ changed = (features ^ dev->features) & ETH_ALL_FEATURES;
if (changed & ~dev->hw_features)
return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
dev->wanted_features =
- (dev->wanted_features & ~changed) | (data & dev->hw_features);
+ (dev->wanted_features & ~changed) | (features & changed);
__netdev_update_features(dev);
@@ -792,34 +515,44 @@ err_out:
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_rxfh_indir *indir;
- u32 table_size;
- size_t full_size;
+ u32 user_size, dev_size;
+ u32 *indir;
int ret;
- if (!dev->ethtool_ops->get_rxfh_indir)
+ if (!dev->ethtool_ops->get_rxfh_indir_size ||
+ !dev->ethtool_ops->get_rxfh_indir)
+ return -EOPNOTSUPP;
+ dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+ if (dev_size == 0)
return -EOPNOTSUPP;
- if (copy_from_user(&table_size,
+ if (copy_from_user(&user_size,
useraddr + offsetof(struct ethtool_rxfh_indir, size),
- sizeof(table_size)))
+ sizeof(user_size)))
return -EFAULT;
- if (table_size >
- (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
- return -ENOMEM;
- full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
- indir = kzalloc(full_size, GFP_USER);
+ if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
+ &dev_size, sizeof(dev_size)))
+ return -EFAULT;
+
+ /* If the user buffer size is 0, this is just a query for the
+ * device table size. Otherwise, if it's smaller than the
+ * device table size it's an error.
+ */
+ if (user_size < dev_size)
+ return user_size == 0 ? 0 : -EINVAL;
+
+ indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
if (!indir)
return -ENOMEM;
- indir->cmd = ETHTOOL_GRXFHINDIR;
- indir->size = table_size;
ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
if (ret)
goto out;
- if (copy_to_user(useraddr, indir, full_size))
+ if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh_indir, ring_index[0]),
+ indir, dev_size * sizeof(indir[0])))
ret = -EFAULT;
out:
@@ -830,30 +563,56 @@ out:
static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_rxfh_indir *indir;
- u32 table_size;
- size_t full_size;
+ struct ethtool_rxnfc rx_rings;
+ u32 user_size, dev_size, i;
+ u32 *indir;
int ret;
- if (!dev->ethtool_ops->set_rxfh_indir)
+ if (!dev->ethtool_ops->get_rxfh_indir_size ||
+ !dev->ethtool_ops->set_rxfh_indir ||
+ !dev->ethtool_ops->get_rxnfc)
+ return -EOPNOTSUPP;
+ dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+ if (dev_size == 0)
return -EOPNOTSUPP;
- if (copy_from_user(&table_size,
+ if (copy_from_user(&user_size,
useraddr + offsetof(struct ethtool_rxfh_indir, size),
- sizeof(table_size)))
+ sizeof(user_size)))
return -EFAULT;
- if (table_size >
- (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
- return -ENOMEM;
- full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
- indir = kmalloc(full_size, GFP_USER);
+ if (user_size != 0 && user_size != dev_size)
+ return -EINVAL;
+
+ indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
if (!indir)
return -ENOMEM;
- if (copy_from_user(indir, useraddr, full_size)) {
- ret = -EFAULT;
+ rx_rings.cmd = ETHTOOL_GRXRINGS;
+ ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
+ if (ret)
goto out;
+
+ if (user_size == 0) {
+ for (i = 0; i < dev_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ } else {
+ if (copy_from_user(indir,
+ useraddr +
+ offsetof(struct ethtool_rxfh_indir,
+ ring_index[0]),
+ dev_size * sizeof(indir[0]))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* Validate ring indices */
+ for (i = 0; i < dev_size; i++) {
+ if (indir[i] >= rx_rings.data) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
}
ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
@@ -1231,81 +990,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
}
-static int __ethtool_set_sg(struct net_device *dev, u32 data)
-{
- int err;
-
- if (!dev->ethtool_ops->set_sg)
- return -EOPNOTSUPP;
-
- if (data && !(dev->features & NETIF_F_ALL_CSUM))
- return -EINVAL;
-
- if (!data && dev->ethtool_ops->set_tso) {
- err = dev->ethtool_ops->set_tso(dev, 0);
- if (err)
- return err;
- }
-
- if (!data && dev->ethtool_ops->set_ufo) {
- err = dev->ethtool_ops->set_ufo(dev, 0);
- if (err)
- return err;
- }
- return dev->ethtool_ops->set_sg(dev, data);
-}
-
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
-{
- int err;
-
- if (!dev->ethtool_ops->set_tx_csum)
- return -EOPNOTSUPP;
-
- if (!data && dev->ethtool_ops->set_sg) {
- err = __ethtool_set_sg(dev, 0);
- if (err)
- return err;
- }
-
- return dev->ethtool_ops->set_tx_csum(dev, data);
-}
-
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
-{
- if (!dev->ethtool_ops->set_rx_csum)
- return -EOPNOTSUPP;
-
- if (!data)
- dev->features &= ~NETIF_F_GRO;
-
- return dev->ethtool_ops->set_rx_csum(dev, data);
-}
-
-static int __ethtool_set_tso(struct net_device *dev, u32 data)
-{
- if (!dev->ethtool_ops->set_tso)
- return -EOPNOTSUPP;
-
- if (data && !(dev->features & NETIF_F_SG))
- return -EINVAL;
-
- return dev->ethtool_ops->set_tso(dev, data);
-}
-
-static int __ethtool_set_ufo(struct net_device *dev, u32 data)
-{
- if (!dev->ethtool_ops->set_ufo)
- return -EOPNOTSUPP;
- if (data && !(dev->features & NETIF_F_SG))
- return -EINVAL;
- if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
- (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
- == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
- return -EINVAL;
- return dev->ethtool_ops->set_ufo(dev, data);
-}
-
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
@@ -1771,9 +1455,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
break;
case ETHTOOL_GFLAGS:
rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_flags ?
- dev->ethtool_ops->get_flags :
- ethtool_op_get_flags));
+ __ethtool_get_flags);
break;
case ETHTOOL_SFLAGS:
rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
diff --git a/net/core/flow.c b/net/core/flow.c
index 8ae42de9c79e..e318c7e98042 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -358,6 +358,18 @@ void flow_cache_flush(void)
put_online_cpus();
}
+static void flow_cache_flush_task(struct work_struct *work)
+{
+ flow_cache_flush();
+}
+
+static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
+
+void flow_cache_flush_deferred(void)
+{
+ schedule_work(&flow_cache_flush_work);
+}
+
static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
new file mode 100644
index 000000000000..0985b9b14b80
--- /dev/null
+++ b/net/core/flow_dissector.c
@@ -0,0 +1,143 @@
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
+#include <net/flow_keys.h>
+
+/* copy saddr & daddr, possibly using 64bit load/store
+ * Equivalent to : flow->src = iph->saddr;
+ * flow->dst = iph->daddr;
+ */
+static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
+{
+ BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
+ offsetof(typeof(*flow), src) + sizeof(flow->src));
+ memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
+}
+
+bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+{
+ int poff, nhoff = skb_network_offset(skb);
+ u8 ip_proto;
+ __be16 proto = skb->protocol;
+
+ memset(flow, 0, sizeof(*flow));
+
+again:
+ switch (proto) {
+ case __constant_htons(ETH_P_IP): {
+ const struct iphdr *iph;
+ struct iphdr _iph;
+ip:
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (!iph)
+ return false;
+
+ if (ip_is_fragment(iph))
+ ip_proto = 0;
+ else
+ ip_proto = iph->protocol;
+ iph_to_flow_copy_addrs(flow, iph);
+ nhoff += iph->ihl * 4;
+ break;
+ }
+ case __constant_htons(ETH_P_IPV6): {
+ const struct ipv6hdr *iph;
+ struct ipv6hdr _iph;
+ipv6:
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (!iph)
+ return false;
+
+ ip_proto = iph->nexthdr;
+ flow->src = iph->saddr.s6_addr32[3];
+ flow->dst = iph->daddr.s6_addr32[3];
+ nhoff += sizeof(struct ipv6hdr);
+ break;
+ }
+ case __constant_htons(ETH_P_8021Q): {
+ const struct vlan_hdr *vlan;
+ struct vlan_hdr _vlan;
+
+ vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
+ if (!vlan)
+ return false;
+
+ proto = vlan->h_vlan_encapsulated_proto;
+ nhoff += sizeof(*vlan);
+ goto again;
+ }
+ case __constant_htons(ETH_P_PPP_SES): {
+ struct {
+ struct pppoe_hdr hdr;
+ __be16 proto;
+ } *hdr, _hdr;
+ hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+ if (!hdr)
+ return false;
+ proto = hdr->proto;
+ nhoff += PPPOE_SES_HLEN;
+ switch (proto) {
+ case __constant_htons(PPP_IP):
+ goto ip;
+ case __constant_htons(PPP_IPV6):
+ goto ipv6;
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ } *hdr, _hdr;
+
+ hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+ if (!hdr)
+ return false;
+ /*
+ * Only look inside GRE if version zero and no
+ * routing
+ */
+ if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
+ proto = hdr->proto;
+ nhoff += 4;
+ if (hdr->flags & GRE_CSUM)
+ nhoff += 4;
+ if (hdr->flags & GRE_KEY)
+ nhoff += 4;
+ if (hdr->flags & GRE_SEQ)
+ nhoff += 4;
+ goto again;
+ }
+ break;
+ }
+ case IPPROTO_IPIP:
+ goto again;
+ default:
+ break;
+ }
+
+ flow->ip_proto = ip_proto;
+ poff = proto_ports_offset(ip_proto);
+ if (poff >= 0) {
+ __be32 *ports, _ports;
+
+ nhoff += poff;
+ ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+ if (ports)
+ flow->ports = *ports;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(skb_flow_dissect);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 039d51e6c284..e287346e0934 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
it to safe state.
*/
skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
n->output = neigh_blackhole;
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
@@ -272,7 +273,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
}
EXPORT_SYMBOL(neigh_ifdown);
-static struct neighbour *neigh_alloc(struct neigh_table *tbl)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
@@ -287,7 +288,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
goto out_entries;
}
- n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
+ if (tbl->entry_size)
+ n = kzalloc(tbl->entry_size, GFP_ATOMIC);
+ else {
+ int sz = sizeof(*n) + tbl->key_len;
+
+ sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
+ sz += dev->neigh_priv_len;
+ n = kzalloc(sz, GFP_ATOMIC);
+ }
if (!n)
goto out_entries;
@@ -313,11 +322,18 @@ out_entries:
goto out;
}
+static void neigh_get_hash_rnd(u32 *x)
+{
+ get_random_bytes(x, sizeof(*x));
+ *x |= 1;
+}
+
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
size_t size = (1 << shift) * sizeof(struct neighbour *);
struct neigh_hash_table *ret;
struct neighbour __rcu **buckets;
+ int i;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
@@ -334,8 +350,8 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
}
ret->hash_buckets = buckets;
ret->hash_shift = shift;
- get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
- ret->hash_rnd |= 1;
+ for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
+ neigh_get_hash_rnd(&ret->hash_rnd[i]);
return ret;
}
@@ -462,7 +478,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
u32 hash_val;
int key_len = tbl->key_len;
int error;
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+ struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
if (!n) {
@@ -480,6 +496,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
goto out_neigh_release;
}
+ if (dev->netdev_ops->ndo_neigh_construct) {
+ error = dev->netdev_ops->ndo_neigh_construct(n);
+ if (error < 0) {
+ rc = ERR_PTR(error);
+ goto out_neigh_release;
+ }
+ }
+
/* Device specific setup. */
if (n->parms->neigh_setup &&
(error = n->parms->neigh_setup(n)) < 0) {
@@ -677,18 +701,14 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
neigh_parms_destroy(parms);
}
-static void neigh_destroy_rcu(struct rcu_head *head)
-{
- struct neighbour *neigh = container_of(head, struct neighbour, rcu);
-
- kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
-}
/*
* neighbour must already be out of the table;
*
*/
void neigh_destroy(struct neighbour *neigh)
{
+ struct net_device *dev = neigh->dev;
+
NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
if (!neigh->dead) {
@@ -702,14 +722,18 @@ void neigh_destroy(struct neighbour *neigh)
printk(KERN_WARNING "Impossible event.\n");
skb_queue_purge(&neigh->arp_queue);
+ neigh->arp_queue_len_bytes = 0;
- dev_put(neigh->dev);
+ if (dev->netdev_ops->ndo_neigh_destroy)
+ dev->netdev_ops->ndo_neigh_destroy(neigh);
+
+ dev_put(dev);
neigh_parms_put(neigh->parms);
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
atomic_dec(&neigh->tbl->entries);
- call_rcu(&neigh->rcu, neigh_destroy_rcu);
+ kfree_rcu(neigh, rcu);
}
EXPORT_SYMBOL(neigh_destroy);
@@ -842,6 +866,7 @@ static void neigh_invalidate(struct neighbour *neigh)
write_lock(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
+ neigh->arp_queue_len_bytes = 0;
}
static void neigh_probe(struct neighbour *neigh)
@@ -980,15 +1005,20 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
if (neigh->nud_state == NUD_INCOMPLETE) {
if (skb) {
- if (skb_queue_len(&neigh->arp_queue) >=
- neigh->parms->queue_len) {
+ while (neigh->arp_queue_len_bytes + skb->truesize >
+ neigh->parms->queue_len_bytes) {
struct sk_buff *buff;
+
buff = __skb_dequeue(&neigh->arp_queue);
+ if (!buff)
+ break;
+ neigh->arp_queue_len_bytes -= buff->truesize;
kfree_skb(buff);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
__skb_queue_tail(&neigh->arp_queue, skb);
+ neigh->arp_queue_len_bytes += skb->truesize;
}
rc = 1;
}
@@ -1167,7 +1197,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
rcu_read_lock();
/* On shaper/eql skb->dst->neighbour != neigh :( */
- if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
+ if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
n1 = n2;
n1->output(n1, skb);
rcu_read_unlock();
@@ -1175,6 +1205,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
write_lock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
+ neigh->arp_queue_len_bytes = 0;
}
out:
if (update_isrouter) {
@@ -1477,11 +1508,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
tbl->parms.reachable_time =
neigh_rand_reach_time(tbl->parms.base_reachable_time);
- if (!tbl->kmem_cachep)
- tbl->kmem_cachep =
- kmem_cache_create(tbl->id, tbl->entry_size, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL);
tbl->stats = alloc_percpu(struct neigh_statistics);
if (!tbl->stats)
panic("cannot create neighbour cache statistics");
@@ -1566,9 +1592,6 @@ int neigh_table_clear(struct neigh_table *tbl)
free_percpu(tbl->stats);
tbl->stats = NULL;
- kmem_cache_destroy(tbl->kmem_cachep);
- tbl->kmem_cachep = NULL;
-
return 0;
}
EXPORT_SYMBOL(neigh_table_clear);
@@ -1747,7 +1770,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
- NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+ NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
+ /* approximative value for deprecated QUEUE_LEN (in packets) */
+ NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
+ DIV_ROUND_UP(parms->queue_len_bytes,
+ SKB_TRUESIZE(ETH_FRAME_LEN)));
NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
@@ -1808,7 +1835,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
- ndc.ndtc_hash_rnd = nht->hash_rnd;
+ ndc.ndtc_hash_rnd = nht->hash_rnd[0];
ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
rcu_read_unlock_bh();
@@ -1974,7 +2001,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
switch (i) {
case NDTPA_QUEUE_LEN:
- p->queue_len = nla_get_u32(tbp[i]);
+ p->queue_len_bytes = nla_get_u32(tbp[i]) *
+ SKB_TRUESIZE(ETH_FRAME_LEN);
+ break;
+ case NDTPA_QUEUE_LENBYTES:
+ p->queue_len_bytes = nla_get_u32(tbp[i]);
break;
case NDTPA_PROXY_QLEN:
p->proxy_qlen = nla_get_u32(tbp[i]);
@@ -2397,7 +2428,10 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
- pn = pn->next;
+ do {
+ pn = pn->next;
+ } while (pn && !net_eq(pneigh_net(pn), net));
+
while (!pn) {
if (++state->bucket > PNEIGH_HASHMASK)
break;
@@ -2635,117 +2669,158 @@ EXPORT_SYMBOL(neigh_app_ns);
#ifdef CONFIG_SYSCTL
-#define NEIGH_VARS_MAX 19
+static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int size, ret;
+ ctl_table tmp = *ctl;
+
+ tmp.data = &size;
+ size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
+ ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret)
+ *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
+ return ret;
+}
+
+enum {
+ NEIGH_VAR_MCAST_PROBE,
+ NEIGH_VAR_UCAST_PROBE,
+ NEIGH_VAR_APP_PROBE,
+ NEIGH_VAR_RETRANS_TIME,
+ NEIGH_VAR_BASE_REACHABLE_TIME,
+ NEIGH_VAR_DELAY_PROBE_TIME,
+ NEIGH_VAR_GC_STALETIME,
+ NEIGH_VAR_QUEUE_LEN,
+ NEIGH_VAR_QUEUE_LEN_BYTES,
+ NEIGH_VAR_PROXY_QLEN,
+ NEIGH_VAR_ANYCAST_DELAY,
+ NEIGH_VAR_PROXY_DELAY,
+ NEIGH_VAR_LOCKTIME,
+ NEIGH_VAR_RETRANS_TIME_MS,
+ NEIGH_VAR_BASE_REACHABLE_TIME_MS,
+ NEIGH_VAR_GC_INTERVAL,
+ NEIGH_VAR_GC_THRESH1,
+ NEIGH_VAR_GC_THRESH2,
+ NEIGH_VAR_GC_THRESH3,
+ NEIGH_VAR_MAX
+};
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- struct ctl_table neigh_vars[NEIGH_VARS_MAX];
+ struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
char *dev_name;
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
- {
+ [NEIGH_VAR_MCAST_PROBE] = {
.procname = "mcast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_UCAST_PROBE] = {
.procname = "ucast_solicit",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_APP_PROBE] = {
.procname = "app_solicit",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_RETRANS_TIME] = {
.procname = "retrans_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_BASE_REACHABLE_TIME] = {
.procname = "base_reachable_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_DELAY_PROBE_TIME] = {
.procname = "delay_first_probe_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_GC_STALETIME] = {
.procname = "gc_stale_time",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_QUEUE_LEN] = {
.procname = "unres_qlen",
.maxlen = sizeof(int),
.mode = 0644,
+ .proc_handler = proc_unres_qlen,
+ },
+ [NEIGH_VAR_QUEUE_LEN_BYTES] = {
+ .procname = "unres_qlen_bytes",
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_PROXY_QLEN] = {
.procname = "proxy_qlen",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_ANYCAST_DELAY] = {
.procname = "anycast_delay",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_PROXY_DELAY] = {
.procname = "proxy_delay",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_LOCKTIME] = {
.procname = "locktime",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_userhz_jiffies,
},
- {
+ [NEIGH_VAR_RETRANS_TIME_MS] = {
.procname = "retrans_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
- {
+ [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
.procname = "base_reachable_time_ms",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
- {
+ [NEIGH_VAR_GC_INTERVAL] = {
.procname = "gc_interval",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
+ [NEIGH_VAR_GC_THRESH1] = {
.procname = "gc_thresh1",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_GC_THRESH2] = {
.procname = "gc_thresh2",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NEIGH_VAR_GC_THRESH3] = {
.procname = "gc_thresh3",
.maxlen = sizeof(int),
.mode = 0644,
@@ -2778,47 +2853,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (!t)
goto err;
- t->neigh_vars[0].data = &p->mcast_probes;
- t->neigh_vars[1].data = &p->ucast_probes;
- t->neigh_vars[2].data = &p->app_probes;
- t->neigh_vars[3].data = &p->retrans_time;
- t->neigh_vars[4].data = &p->base_reachable_time;
- t->neigh_vars[5].data = &p->delay_probe_time;
- t->neigh_vars[6].data = &p->gc_staletime;
- t->neigh_vars[7].data = &p->queue_len;
- t->neigh_vars[8].data = &p->proxy_qlen;
- t->neigh_vars[9].data = &p->anycast_delay;
- t->neigh_vars[10].data = &p->proxy_delay;
- t->neigh_vars[11].data = &p->locktime;
- t->neigh_vars[12].data = &p->retrans_time;
- t->neigh_vars[13].data = &p->base_reachable_time;
+ t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes;
+ t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes;
+ t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time;
+ t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
+ t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
+ t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
+ t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
+ t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
+ t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
+ t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
+ t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
- memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
+ memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
+ sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
} else {
dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
- t->neigh_vars[14].data = (int *)(p + 1);
- t->neigh_vars[15].data = (int *)(p + 1) + 1;
- t->neigh_vars[16].data = (int *)(p + 1) + 2;
- t->neigh_vars[17].data = (int *)(p + 1) + 3;
+ t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
+ t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
}
if (handler) {
/* RetransTime */
- t->neigh_vars[3].proc_handler = handler;
- t->neigh_vars[3].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
/* ReachableTime */
- t->neigh_vars[4].proc_handler = handler;
- t->neigh_vars[4].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
/* RetransTime (in milliseconds)*/
- t->neigh_vars[12].proc_handler = handler;
- t->neigh_vars[12].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
/* ReachableTime (in milliseconds) */
- t->neigh_vars[13].proc_handler = handler;
- t->neigh_vars[13].extra1 = dev;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
+ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
}
t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c71c434a4c05..abf4393a77b3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
#include <linux/wireless.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
+#include <linux/jiffies.h>
#include <net/wext.h>
#include "net-sysfs.h"
@@ -606,9 +607,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
rcu_assign_pointer(queue->rps_map, map);
spin_unlock(&rps_map_lock);
- if (old_map)
+ if (map)
+ jump_label_inc(&rps_needed);
+ if (old_map) {
kfree_rcu(old_map, rcu);
-
+ jump_label_dec(&rps_needed);
+ }
free_cpumask_var(mask);
return len;
}
@@ -618,15 +622,15 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
char *buf)
{
struct rps_dev_flow_table *flow_table;
- unsigned int val = 0;
+ unsigned long val = 0;
rcu_read_lock();
flow_table = rcu_dereference(queue->rps_flow_table);
if (flow_table)
- val = flow_table->mask + 1;
+ val = (unsigned long)flow_table->mask + 1;
rcu_read_unlock();
- return sprintf(buf, "%u\n", val);
+ return sprintf(buf, "%lu\n", val);
}
static void rps_dev_flow_table_release_work(struct work_struct *work)
@@ -650,33 +654,46 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attr,
const char *buf, size_t len)
{
- unsigned int count;
- char *endp;
+ unsigned long mask, count;
struct rps_dev_flow_table *table, *old_table;
static DEFINE_SPINLOCK(rps_dev_flow_lock);
+ int rc;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- count = simple_strtoul(buf, &endp, 0);
- if (endp == buf)
- return -EINVAL;
+ rc = kstrtoul(buf, 0, &count);
+ if (rc < 0)
+ return rc;
if (count) {
- int i;
-
- if (count > 1<<30) {
+ mask = count - 1;
+ /* mask = roundup_pow_of_two(count) - 1;
+ * without overflows...
+ */
+ while ((mask | (mask >> 1)) != mask)
+ mask |= (mask >> 1);
+ /* On 64 bit arches, must check mask fits in table->mask (u32),
+ * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1)
+ * doesnt overflow.
+ */
+#if BITS_PER_LONG > 32
+ if (mask > (unsigned long)(u32)mask)
+ return -EINVAL;
+#else
+ if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
+ / sizeof(struct rps_dev_flow)) {
/* Enforce a limit to prevent overflow */
return -EINVAL;
}
- count = roundup_pow_of_two(count);
- table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+#endif
+ table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
if (!table)
return -ENOMEM;
- table->mask = count - 1;
- for (i = 0; i < count; i++)
- table->flows[i].cpu = RPS_NO_CPU;
+ table->mask = mask;
+ for (count = 0; count <= mask; count++)
+ table->flows[count].cpu = RPS_NO_CPU;
} else
table = NULL;
@@ -780,7 +797,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
#endif
}
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
/*
* netdev_queue sysfs structures and functions.
*/
@@ -826,6 +843,133 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
.store = netdev_queue_attr_store,
};
+static ssize_t show_trans_timeout(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ char *buf)
+{
+ unsigned long trans_timeout;
+
+ spin_lock_irq(&queue->_xmit_lock);
+ trans_timeout = queue->trans_timeout;
+ spin_unlock_irq(&queue->_xmit_lock);
+
+ return sprintf(buf, "%lu", trans_timeout);
+}
+
+static struct netdev_queue_attribute queue_trans_timeout =
+ __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+
+#ifdef CONFIG_BQL
+/*
+ * Byte queue limits sysfs structures and functions.
+ */
+static ssize_t bql_show(char *buf, unsigned int value)
+{
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t bql_set(const char *buf, const size_t count,
+ unsigned int *pvalue)
+{
+ unsigned int value;
+ int err;
+
+ if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+ value = DQL_MAX_LIMIT;
+ else {
+ err = kstrtouint(buf, 10, &value);
+ if (err < 0)
+ return err;
+ if (value > DQL_MAX_LIMIT)
+ return -EINVAL;
+ }
+
+ *pvalue = value;
+
+ return count;
+}
+
+static ssize_t bql_show_hold_time(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr,
+ char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
+}
+
+static ssize_t bql_set_hold_time(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct dql *dql = &queue->dql;
+ unsigned value;
+ int err;
+
+ err = kstrtouint(buf, 10, &value);
+ if (err < 0)
+ return err;
+
+ dql->slack_hold_time = msecs_to_jiffies(value);
+
+ return len;
+}
+
+static struct netdev_queue_attribute bql_hold_time_attribute =
+ __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
+ bql_set_hold_time);
+
+static ssize_t bql_show_inflight(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr,
+ char *buf)
+{
+ struct dql *dql = &queue->dql;
+
+ return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
+}
+
+static struct netdev_queue_attribute bql_inflight_attribute =
+ __ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL);
+
+#define BQL_ATTR(NAME, FIELD) \
+static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
+ struct netdev_queue_attribute *attr, \
+ char *buf) \
+{ \
+ return bql_show(buf, queue->dql.FIELD); \
+} \
+ \
+static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
+ struct netdev_queue_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ return bql_set(buf, len, &queue->dql.FIELD); \
+} \
+ \
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \
+ __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \
+ bql_set_ ## NAME);
+
+BQL_ATTR(limit, limit)
+BQL_ATTR(limit_max, max_limit)
+BQL_ATTR(limit_min, min_limit)
+
+static struct attribute *dql_attrs[] = {
+ &bql_limit_attribute.attr,
+ &bql_limit_max_attribute.attr,
+ &bql_limit_min_attribute.attr,
+ &bql_hold_time_attribute.attr,
+ &bql_inflight_attribute.attr,
+ NULL
+};
+
+static struct attribute_group dql_group = {
+ .name = "byte_queue_limits",
+ .attrs = dql_attrs,
+};
+#endif /* CONFIG_BQL */
+
+#ifdef CONFIG_XPS
static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
@@ -890,6 +1034,52 @@ static DEFINE_MUTEX(xps_map_mutex);
#define xmap_dereference(P) \
rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+static void xps_queue_release(struct netdev_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ unsigned long index;
+ int i, pos, nonempty = 0;
+
+ index = get_netdev_queue_index(queue);
+
+ mutex_lock(&xps_map_mutex);
+ dev_maps = xmap_dereference(dev->xps_maps);
+
+ if (dev_maps) {
+ for_each_possible_cpu(i) {
+ map = xmap_dereference(dev_maps->cpu_map[i]);
+ if (!map)
+ continue;
+
+ for (pos = 0; pos < map->len; pos++)
+ if (map->queues[pos] == index)
+ break;
+
+ if (pos < map->len) {
+ if (map->len > 1)
+ map->queues[pos] =
+ map->queues[--map->len];
+ else {
+ RCU_INIT_POINTER(dev_maps->cpu_map[i],
+ NULL);
+ kfree_rcu(map, rcu);
+ map = NULL;
+ }
+ }
+ if (map)
+ nonempty = 1;
+ }
+
+ if (!nonempty) {
+ RCU_INIT_POINTER(dev->xps_maps, NULL);
+ kfree_rcu(dev_maps, rcu);
+ }
+ }
+ mutex_unlock(&xps_map_mutex);
+}
+
static ssize_t store_xps_map(struct netdev_queue *queue,
struct netdev_queue_attribute *attribute,
const char *buf, size_t len)
@@ -901,7 +1091,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
struct xps_map *map, *new_map;
struct xps_dev_maps *dev_maps, *new_dev_maps;
int nonempty = 0;
- int numa_node = -2;
+ int numa_node_id = -2;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -944,10 +1134,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
#ifdef CONFIG_NUMA
if (need_set) {
- if (numa_node == -2)
- numa_node = cpu_to_node(cpu);
- else if (numa_node != cpu_to_node(cpu))
- numa_node = -1;
+ if (numa_node_id == -2)
+ numa_node_id = cpu_to_node(cpu);
+ else if (numa_node_id != cpu_to_node(cpu))
+ numa_node_id = -1;
}
#endif
if (need_set && pos >= map_len) {
@@ -997,7 +1187,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
if (dev_maps)
kfree_rcu(dev_maps, rcu);
- netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
+ netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
NUMA_NO_NODE);
mutex_unlock(&xps_map_mutex);
@@ -1020,58 +1210,23 @@ error:
static struct netdev_queue_attribute xps_cpus_attribute =
__ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+#endif /* CONFIG_XPS */
static struct attribute *netdev_queue_default_attrs[] = {
+ &queue_trans_timeout.attr,
+#ifdef CONFIG_XPS
&xps_cpus_attribute.attr,
+#endif
NULL
};
static void netdev_queue_release(struct kobject *kobj)
{
struct netdev_queue *queue = to_netdev_queue(kobj);
- struct net_device *dev = queue->dev;
- struct xps_dev_maps *dev_maps;
- struct xps_map *map;
- unsigned long index;
- int i, pos, nonempty = 0;
-
- index = get_netdev_queue_index(queue);
-
- mutex_lock(&xps_map_mutex);
- dev_maps = xmap_dereference(dev->xps_maps);
-
- if (dev_maps) {
- for_each_possible_cpu(i) {
- map = xmap_dereference(dev_maps->cpu_map[i]);
- if (!map)
- continue;
- for (pos = 0; pos < map->len; pos++)
- if (map->queues[pos] == index)
- break;
-
- if (pos < map->len) {
- if (map->len > 1)
- map->queues[pos] =
- map->queues[--map->len];
- else {
- RCU_INIT_POINTER(dev_maps->cpu_map[i],
- NULL);
- kfree_rcu(map, rcu);
- map = NULL;
- }
- }
- if (map)
- nonempty = 1;
- }
-
- if (!nonempty) {
- RCU_INIT_POINTER(dev->xps_maps, NULL);
- kfree_rcu(dev_maps, rcu);
- }
- }
-
- mutex_unlock(&xps_map_mutex);
+#ifdef CONFIG_XPS
+ xps_queue_release(queue);
+#endif
memset(kobj, 0, sizeof(*kobj));
dev_put(queue->dev);
@@ -1092,22 +1247,29 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
kobj->kset = net->queues_kset;
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
"tx-%u", index);
- if (error) {
- kobject_put(kobj);
- return error;
- }
+ if (error)
+ goto exit;
+
+#ifdef CONFIG_BQL
+ error = sysfs_create_group(kobj, &dql_group);
+ if (error)
+ goto exit;
+#endif
kobject_uevent(kobj, KOBJ_ADD);
dev_hold(queue->dev);
+ return 0;
+exit:
+ kobject_put(kobj);
return error;
}
-#endif /* CONFIG_XPS */
+#endif /* CONFIG_SYSFS */
int
netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
int i;
int error = 0;
@@ -1119,20 +1281,26 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
}
}
- while (--i >= new_num)
- kobject_put(&net->_tx[i].kobj);
+ while (--i >= new_num) {
+ struct netdev_queue *queue = net->_tx + i;
+
+#ifdef CONFIG_BQL
+ sysfs_remove_group(&queue->kobj, &dql_group);
+#endif
+ kobject_put(&queue->kobj);
+ }
return error;
#else
return 0;
-#endif
+#endif /* CONFIG_SYSFS */
}
static int register_queue_kobjects(struct net_device *net)
{
int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
net->queues_kset = kset_create_and_add("queues",
NULL, &net->dev.kobj);
if (!net->queues_kset)
@@ -1173,7 +1341,7 @@ static void remove_queue_kobjects(struct net_device *net)
net_rx_queue_update_kobjects(net, real_rx, 0);
netdev_queue_update_kobjects(net, real_tx, 0);
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
kset_unregister(net->queues_kset);
#endif
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cf64c1ffa4cd..0d38808a2305 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,7 +76,7 @@ static void queue_process(struct work_struct *work)
local_irq_save(flags);
__netif_tx_lock(txq, smp_processor_id());
- if (netif_tx_queue_frozen_or_stopped(txq) ||
+ if (netif_xmit_frozen_or_stopped(txq) ||
ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
__netif_tx_unlock(txq);
@@ -317,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
- if (!netif_tx_queue_stopped(txq)) {
+ if (!netif_xmit_stopped(txq)) {
status = ops->ndo_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
@@ -422,6 +422,7 @@ static void arp_reply(struct sk_buff *skb)
struct sk_buff *send_skb;
struct netpoll *np, *tmp;
unsigned long flags;
+ int hlen, tlen;
int hits = 0;
if (list_empty(&npinfo->rx_np))
@@ -479,8 +480,9 @@ static void arp_reply(struct sk_buff *skb)
if (tip != np->local_ip)
continue;
- send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
- LL_RESERVED_SPACE(np->dev));
+ hlen = LL_RESERVED_SPACE(np->dev);
+ tlen = np->dev->needed_tailroom;
+ send_skb = find_skb(np, size + hlen + tlen, hlen);
if (!send_skb)
continue;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
new file mode 100644
index 000000000000..3a9fd4826b75
--- /dev/null
+++ b/net/core/netprio_cgroup.c
@@ -0,0 +1,344 @@
+/*
+ * net/core/netprio_cgroup.c Priority Control Group
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <linux/rcupdate.h>
+#include <linux/atomic.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+#include <net/netprio_cgroup.h>
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
+
+struct cgroup_subsys net_prio_subsys = {
+ .name = "net_prio",
+ .create = cgrp_create,
+ .destroy = cgrp_destroy,
+ .populate = cgrp_populate,
+#ifdef CONFIG_NETPRIO_CGROUP
+ .subsys_id = net_prio_subsys_id,
+#endif
+ .module = THIS_MODULE
+};
+
+#define PRIOIDX_SZ 128
+
+static unsigned long prioidx_map[PRIOIDX_SZ];
+static DEFINE_SPINLOCK(prioidx_map_lock);
+static atomic_t max_prioidx = ATOMIC_INIT(0);
+
+static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
+{
+ return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
+ struct cgroup_netprio_state, css);
+}
+
+static int get_prioidx(u32 *prio)
+{
+ unsigned long flags;
+ u32 prioidx;
+
+ spin_lock_irqsave(&prioidx_map_lock, flags);
+ prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
+ set_bit(prioidx, prioidx_map);
+ spin_unlock_irqrestore(&prioidx_map_lock, flags);
+ if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ)
+ return -ENOSPC;
+
+ atomic_set(&max_prioidx, prioidx);
+ *prio = prioidx;
+ return 0;
+}
+
+static void put_prioidx(u32 idx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&prioidx_map_lock, flags);
+ clear_bit(idx, prioidx_map);
+ spin_unlock_irqrestore(&prioidx_map_lock, flags);
+}
+
+static void extend_netdev_table(struct net_device *dev, u32 new_len)
+{
+ size_t new_size = sizeof(struct netprio_map) +
+ ((sizeof(u32) * new_len));
+ struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
+ struct netprio_map *old_priomap;
+ int i;
+
+ old_priomap = rtnl_dereference(dev->priomap);
+
+ if (!new_priomap) {
+ printk(KERN_WARNING "Unable to alloc new priomap!\n");
+ return;
+ }
+
+ for (i = 0;
+ old_priomap && (i < old_priomap->priomap_len);
+ i++)
+ new_priomap->priomap[i] = old_priomap->priomap[i];
+
+ new_priomap->priomap_len = new_len;
+
+ rcu_assign_pointer(dev->priomap, new_priomap);
+ if (old_priomap)
+ kfree_rcu(old_priomap, rcu);
+}
+
+static void update_netdev_tables(void)
+{
+ struct net_device *dev;
+ u32 max_len = atomic_read(&max_prioidx);
+ struct netprio_map *map;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ map = rtnl_dereference(dev->priomap);
+ if ((!map) ||
+ (map->priomap_len < max_len))
+ extend_netdev_table(dev, max_len);
+ }
+ rtnl_unlock();
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct cgroup_netprio_state *cs;
+ int ret;
+
+ cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+ if (!cs)
+ return ERR_PTR(-ENOMEM);
+
+ if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
+ kfree(cs);
+ return ERR_PTR(-EINVAL);
+ }
+
+ ret = get_prioidx(&cs->prioidx);
+ if (ret != 0) {
+ printk(KERN_WARNING "No space in priority index array\n");
+ kfree(cs);
+ return ERR_PTR(ret);
+ }
+
+ return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ struct cgroup_netprio_state *cs;
+ struct net_device *dev;
+ struct netprio_map *map;
+
+ cs = cgrp_netprio_state(cgrp);
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ map = rtnl_dereference(dev->priomap);
+ if (map)
+ map->priomap[cs->prioidx] = 0;
+ }
+ rtnl_unlock();
+ put_prioidx(cs->prioidx);
+ kfree(cs);
+}
+
+static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+{
+ return (u64)cgrp_netprio_state(cgrp)->prioidx;
+}
+
+static int read_priomap(struct cgroup *cont, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ struct net_device *dev;
+ u32 prioidx = cgrp_netprio_state(cont)->prioidx;
+ u32 priority;
+ struct netprio_map *map;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, dev) {
+ map = rcu_dereference(dev->priomap);
+ priority = map ? map->priomap[prioidx] : 0;
+ cb->fill(cb, dev->name, priority);
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ char *devname = kstrdup(buffer, GFP_KERNEL);
+ int ret = -EINVAL;
+ u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
+ unsigned long priority;
+ char *priostr;
+ struct net_device *dev;
+ struct netprio_map *map;
+
+ if (!devname)
+ return -ENOMEM;
+
+ /*
+ * Minimally sized valid priomap string
+ */
+ if (strlen(devname) < 3)
+ goto out_free_devname;
+
+ priostr = strstr(devname, " ");
+ if (!priostr)
+ goto out_free_devname;
+
+ /*
+ *Separate the devname from the associated priority
+ *and advance the priostr poitner to the priority value
+ */
+ *priostr = '\0';
+ priostr++;
+
+ /*
+ * If the priostr points to NULL, we're at the end of the passed
+ * in string, and its not a valid write
+ */
+ if (*priostr == '\0')
+ goto out_free_devname;
+
+ ret = kstrtoul(priostr, 10, &priority);
+ if (ret < 0)
+ goto out_free_devname;
+
+ ret = -ENODEV;
+
+ dev = dev_get_by_name(&init_net, devname);
+ if (!dev)
+ goto out_free_devname;
+
+ update_netdev_tables();
+ ret = 0;
+ rcu_read_lock();
+ map = rcu_dereference(dev->priomap);
+ if (map)
+ map->priomap[prioidx] = priority;
+ rcu_read_unlock();
+ dev_put(dev);
+
+out_free_devname:
+ kfree(devname);
+ return ret;
+}
+
+static struct cftype ss_files[] = {
+ {
+ .name = "prioidx",
+ .read_u64 = read_prioidx,
+ },
+ {
+ .name = "ifpriomap",
+ .read_map = read_priomap,
+ .write_string = write_priomap,
+ },
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+static int netprio_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct netprio_map *old;
+ u32 max_len = atomic_read(&max_prioidx);
+
+ /*
+ * Note this is called with rtnl_lock held so we have update side
+ * protection on our rcu assignments
+ */
+
+ switch (event) {
+
+ case NETDEV_REGISTER:
+ if (max_len)
+ extend_netdev_table(dev, max_len);
+ break;
+ case NETDEV_UNREGISTER:
+ old = rtnl_dereference(dev->priomap);
+ RCU_INIT_POINTER(dev->priomap, NULL);
+ if (old)
+ kfree_rcu(old, rcu);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block netprio_device_notifier = {
+ .notifier_call = netprio_device_event
+};
+
+static int __init init_cgroup_netprio(void)
+{
+ int ret;
+
+ ret = cgroup_load_subsys(&net_prio_subsys);
+ if (ret)
+ goto out;
+#ifndef CONFIG_NETPRIO_CGROUP
+ smp_wmb();
+ net_prio_subsys_id = net_prio_subsys.subsys_id;
+#endif
+
+ register_netdevice_notifier(&netprio_device_notifier);
+
+out:
+ return ret;
+}
+
+static void __exit exit_cgroup_netprio(void)
+{
+ struct netprio_map *old;
+ struct net_device *dev;
+
+ unregister_netdevice_notifier(&netprio_device_notifier);
+
+ cgroup_unload_subsys(&net_prio_subsys);
+
+#ifndef CONFIG_NETPRIO_CGROUP
+ net_prio_subsys_id = -1;
+ synchronize_rcu();
+#endif
+
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ old = rtnl_dereference(dev->priomap);
+ RCU_INIT_POINTER(dev->priomap, NULL);
+ if (old)
+ kfree_rcu(old, rcu);
+ }
+ rtnl_unlock();
+}
+
+module_init(init_cgroup_netprio);
+module_exit(exit_cgroup_netprio);
+MODULE_LICENSE("GPL v2");
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0001c243b35c..449fe0f068f8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file,
scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
- ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
+ pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
if (debug)
printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
@@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file,
scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
- ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
- &pkt_dev->min_in6_daddr);
+ pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
if (debug)
printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
@@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file,
scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
- ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
+ pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
if (debug)
printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
@@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
ifp = ifp->if_next) {
if (ifp->scope == IFA_LINK &&
!(ifp->flags & IFA_F_TENTATIVE)) {
- ipv6_addr_copy(&pkt_dev->
- cur_in6_saddr,
- &ifp->addr);
+ pkt_dev->cur_in6_saddr = ifp->addr;
err = 0;
break;
}
@@ -2958,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
iph->payload_len = htons(sizeof(struct udphdr) + datalen);
iph->nexthdr = IPPROTO_UDP;
- ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
- ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
+ iph->daddr = pkt_dev->cur_in6_daddr;
+ iph->saddr = pkt_dev->cur_in6_saddr;
skb->mac_header = (skb->network_header - ETH_HLEN -
pkt_dev->pkt_overhead);
@@ -3345,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
__netif_tx_lock_bh(txq);
- if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
+ if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
ret = NETDEV_TX_BUSY;
pkt_dev->last_ok = 0;
goto unlock;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 182236b2510a..9b570a6a33c5 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -26,10 +26,11 @@
* but then some measure against one socket starving all other sockets
* would be needed.
*
- * It was 128 by default. Experiments with real servers show, that
+ * The minimum value of it is 128. Experiments with real servers show that
* it is absolutely not enough even at 100conn/sec. 256 cures most
- * of problems. This value is adjusted to 128 for very small machines
- * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
+ * of problems.
+ * This value is adjusted to 128 for low memory machines,
+ * and it will increase in proportion to the memory of machine.
* Note : Dont forget somaxconn that may limit backlog too.
*/
int sysctl_max_syn_backlog = 256;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9083e82bdae5..dbf2ddafd52d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -273,6 +273,17 @@ EXPORT_SYMBOL_GPL(rtnl_unregister_all);
static LIST_HEAD(link_ops);
+static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+{
+ const struct rtnl_link_ops *ops;
+
+ list_for_each_entry(ops, &link_ops, list) {
+ if (!strcmp(ops->kind, kind))
+ return ops;
+ }
+ return NULL;
+}
+
/**
* __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
* @ops: struct rtnl_link_ops * to register
@@ -285,6 +296,9 @@ static LIST_HEAD(link_ops);
*/
int __rtnl_link_register(struct rtnl_link_ops *ops)
{
+ if (rtnl_link_ops_get(ops->kind))
+ return -EEXIST;
+
if (!ops->dellink)
ops->dellink = unregister_netdevice_queue;
@@ -351,17 +365,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops)
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
-static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
-{
- const struct rtnl_link_ops *ops;
-
- list_for_each_entry(ops, &link_ops, list) {
- if (!strcmp(ops->kind, kind))
- return ops;
- }
- return NULL;
-}
-
static size_t rtnl_link_get_size(const struct net_device *dev)
{
const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 025233de25f9..6fd44606fdd1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -19,6 +19,7 @@ static int __init net_secret_init(void)
}
late_initcall(net_secret_init);
+#ifdef CONFIG_INET
static u32 seq_scale(u32 seq)
{
/*
@@ -33,8 +34,9 @@ static u32 seq_scale(u32 seq)
*/
return seq + (ktime_to_ns(ktime_get_real()) >> 6);
}
+#endif
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport)
{
@@ -132,7 +134,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
-#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
+#if IS_ENABLED(CONFIG_IP_DCCP)
u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
{
@@ -154,7 +156,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
}
EXPORT_SYMBOL(secure_dccp_sequence_number);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
__be16 sport, __be16 dport)
{
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 06438f926022..da0c97f2fab4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -245,6 +245,55 @@ nodata:
EXPORT_SYMBOL(__alloc_skb);
/**
+ * build_skb - build a network buffer
+ * @data: data buffer provided by caller
+ *
+ * Allocate a new &sk_buff. Caller provides space holding head and
+ * skb_shared_info. @data must have been allocated by kmalloc()
+ * The return is the new skb buffer.
+ * On a failure the return is %NULL, and @data is not freed.
+ * Notes :
+ * Before IO, driver allocates only data buffer where NIC put incoming frame
+ * Driver should add room at head (NET_SKB_PAD) and
+ * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
+ * After IO, driver calls build_skb(), to allocate sk_buff and populate it
+ * before giving packet to stack.
+ * RX rings only contains data buffers, not full skbs.
+ */
+struct sk_buff *build_skb(void *data)
+{
+ struct skb_shared_info *shinfo;
+ struct sk_buff *skb;
+ unsigned int size;
+
+ skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ skb->truesize = SKB_TRUESIZE(size);
+ atomic_set(&skb->users, 1);
+ skb->head = data;
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->mac_header = ~0U;
+#endif
+
+ /* make sure we initialize shinfo sequentially */
+ shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+ atomic_set(&shinfo->dataref, 1);
+ kmemcheck_annotate_variable(shinfo->destructor_arg);
+
+ return skb;
+}
+EXPORT_SYMBOL(build_skb);
+
+/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
* @dev: network device to receive on
* @length: length to allocate
@@ -403,7 +452,7 @@ static void skb_release_head_state(struct sk_buff *skb)
WARN_ON(in_irq());
skb->destructor(skb);
}
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb->nfct);
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
@@ -553,15 +602,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->ip_summed = old->ip_summed;
skb_copy_queue_mapping(new, old);
new->priority = old->priority;
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+#if IS_ENABLED(CONFIG_IP_VS)
new->ipvs_property = old->ipvs_property;
#endif
new->protocol = old->protocol;
new->mark = old->mark;
new->skb_iif = old->skb_iif;
__nf_copy(new, old);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
new->nf_trace = old->nf_trace;
#endif
#ifdef CONFIG_NET_SCHED
@@ -791,8 +839,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
EXPORT_SYMBOL(skb_copy);
/**
- * pskb_copy - create copy of an sk_buff with private head.
+ * __pskb_copy - create copy of an sk_buff with private head.
* @skb: buffer to copy
+ * @headroom: headroom of new skb
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and part of its data, located
@@ -803,16 +852,16 @@ EXPORT_SYMBOL(skb_copy);
* The returned buffer has a reference count of 1.
*/
-struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
{
- unsigned int size = skb_end_pointer(skb) - skb->head;
+ unsigned int size = skb_headlen(skb) + headroom;
struct sk_buff *n = alloc_skb(size, gfp_mask);
if (!n)
goto out;
/* Set the data pointer */
- skb_reserve(n, skb_headroom(skb));
+ skb_reserve(n, headroom);
/* Set the tail pointer and length */
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
@@ -848,7 +897,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
out:
return n;
}
-EXPORT_SYMBOL(pskb_copy);
+EXPORT_SYMBOL(__pskb_copy);
/**
* pskb_expand_head - reallocate header of &sk_buff
@@ -2230,7 +2279,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb)
* @shiftlen: shift up to this many bytes
*
* Attempts to shift up to shiftlen worth of bytes, which may be less than
- * the length of the skb, from tgt to skb. Returns number bytes shifted.
+ * the length of the skb, from skb to tgt. Returns number bytes shifted.
* It's up to caller to free skb if everything was shifted.
*
* If @tgt runs out of frags, the whole operation is aborted.
@@ -2621,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
diff --git a/net/core/sock.c b/net/core/sock.c
index cbdf51c0d5ac..002939cfc069 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,7 @@
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/user_namespace.h>
+#include <linux/jump_label.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -125,6 +126,7 @@
#include <net/xfrm.h>
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
+#include <net/netprio_cgroup.h>
#include <linux/filter.h>
@@ -134,6 +136,46 @@
#include <net/tcp.h>
#endif
+static DEFINE_MUTEX(proto_list_mutex);
+static LIST_HEAD(proto_list);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+ struct proto *proto;
+ int ret = 0;
+
+ mutex_lock(&proto_list_mutex);
+ list_for_each_entry(proto, &proto_list, node) {
+ if (proto->init_cgroup) {
+ ret = proto->init_cgroup(cgrp, ss);
+ if (ret)
+ goto out;
+ }
+ }
+
+ mutex_unlock(&proto_list_mutex);
+ return ret;
+out:
+ list_for_each_entry_continue_reverse(proto, &proto_list, node)
+ if (proto->destroy_cgroup)
+ proto->destroy_cgroup(cgrp, ss);
+ mutex_unlock(&proto_list_mutex);
+ return ret;
+}
+
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+ struct proto *proto;
+
+ mutex_lock(&proto_list_mutex);
+ list_for_each_entry_reverse(proto, &proto_list, node)
+ if (proto->destroy_cgroup)
+ proto->destroy_cgroup(cgrp, ss);
+ mutex_unlock(&proto_list_mutex);
+}
+#endif
+
/*
* Each address family might have different locking rules, so we have
* one slock key per address family:
@@ -141,6 +183,9 @@
static struct lock_class_key af_family_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
+struct jump_label_key memcg_socket_limit_enabled;
+EXPORT_SYMBOL(memcg_socket_limit_enabled);
+
/*
* Make lock validator output more readable. (we pre-construct these
* strings build-time, so that runtime initialization of socket
@@ -221,10 +266,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
-#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+#if defined(CONFIG_CGROUPS)
+#if !defined(CONFIG_NET_CLS_CGROUP)
int net_cls_subsys_id = -1;
EXPORT_SYMBOL_GPL(net_cls_subsys_id);
#endif
+#if !defined(CONFIG_NETPRIO_CGROUP)
+int net_prio_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_prio_subsys_id);
+#endif
+#endif
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
@@ -269,14 +320,14 @@ static void sock_warn_obsolete_bsdism(const char *name)
}
}
-static void sock_disable_timestamp(struct sock *sk, int flag)
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
+static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
- if (sock_flag(sk, flag)) {
- sock_reset_flag(sk, flag);
- if (!sock_flag(sk, SOCK_TIMESTAMP) &&
- !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
+ if (sk->sk_flags & flags) {
+ sk->sk_flags &= ~flags;
+ if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
net_disable_timestamp();
- }
}
}
@@ -288,11 +339,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
- /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
- number of warnings when compiling with -W --ANK
- */
- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned)sk->sk_rcvbuf) {
+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
atomic_inc(&sk->sk_drops);
trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
@@ -682,7 +729,7 @@ set_rcvbuf:
SOCK_TIMESTAMPING_RX_SOFTWARE);
else
sock_disable_timestamp(sk,
- SOCK_TIMESTAMPING_RX_SOFTWARE);
+ (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
val & SOF_TIMESTAMPING_SOFTWARE);
sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
@@ -1120,6 +1167,18 @@ void sock_update_classid(struct sock *sk)
sk->sk_classid = classid;
}
EXPORT_SYMBOL(sock_update_classid);
+
+void sock_update_netprioidx(struct sock *sk)
+{
+ struct cgroup_netprio_state *state;
+ if (in_interrupt())
+ return;
+ rcu_read_lock();
+ state = task_netprio_state(current);
+ sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(sock_update_netprioidx);
#endif
/**
@@ -1147,6 +1206,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
atomic_set(&sk->sk_wmem_alloc, 1);
sock_update_classid(sk);
+ sock_update_netprioidx(sk);
}
return sk;
@@ -1167,8 +1227,7 @@ static void __sk_free(struct sock *sk)
RCU_INIT_POINTER(sk->sk_filter, NULL);
}
- sock_disable_timestamp(sk, SOCK_TIMESTAMP);
- sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
+ sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
if (atomic_read(&sk->sk_omem_alloc))
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
@@ -1213,7 +1272,14 @@ void sk_release_kernel(struct sock *sk)
}
EXPORT_SYMBOL(sk_release_kernel);
-struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
+/**
+ * sk_clone_lock - clone a socket, and lock its clone
+ * @sk: the socket to clone
+ * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
struct sock *newsk;
@@ -1297,16 +1363,15 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
newsk->sk_wq = NULL;
if (newsk->sk_prot->sockets_allocated)
- percpu_counter_inc(newsk->sk_prot->sockets_allocated);
+ sk_sockets_allocated_inc(newsk);
- if (sock_flag(newsk, SOCK_TIMESTAMP) ||
- sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+ if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
}
out:
return newsk;
}
-EXPORT_SYMBOL_GPL(sk_clone);
+EXPORT_SYMBOL_GPL(sk_clone_lock);
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
@@ -1686,30 +1751,34 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
long allocated;
+ int parent_status = UNDER_LIMIT;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = atomic_long_add_return(amt, prot->memory_allocated);
+
+ allocated = sk_memory_allocated_add(sk, amt, &parent_status);
/* Under limit. */
- if (allocated <= prot->sysctl_mem[0]) {
- if (prot->memory_pressure && *prot->memory_pressure)
- *prot->memory_pressure = 0;
+ if (parent_status == UNDER_LIMIT &&
+ allocated <= sk_prot_mem_limits(sk, 0)) {
+ sk_leave_memory_pressure(sk);
return 1;
}
- /* Under pressure. */
- if (allocated > prot->sysctl_mem[1])
- if (prot->enter_memory_pressure)
- prot->enter_memory_pressure(sk);
+ /* Under pressure. (we or our parents) */
+ if ((parent_status > SOFT_LIMIT) ||
+ allocated > sk_prot_mem_limits(sk, 1))
+ sk_enter_memory_pressure(sk);
- /* Over hard limit. */
- if (allocated > prot->sysctl_mem[2])
+ /* Over hard limit (we or our parents) */
+ if ((parent_status == OVER_LIMIT) ||
+ (allocated > sk_prot_mem_limits(sk, 2)))
goto suppress_allocation;
/* guarantee minimum buffer size under pressure */
if (kind == SK_MEM_RECV) {
if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
return 1;
+
} else { /* SK_MEM_SEND */
if (sk->sk_type == SOCK_STREAM) {
if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
@@ -1719,13 +1788,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
return 1;
}
- if (prot->memory_pressure) {
+ if (sk_has_memory_pressure(sk)) {
int alloc;
- if (!*prot->memory_pressure)
+ if (!sk_under_memory_pressure(sk))
return 1;
- alloc = percpu_counter_read_positive(prot->sockets_allocated);
- if (prot->sysctl_mem[2] > alloc *
+ alloc = sk_sockets_allocated_read_positive(sk);
+ if (sk_prot_mem_limits(sk, 2) > alloc *
sk_mem_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
@@ -1748,7 +1817,9 @@ suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
- atomic_long_sub(amt, prot->memory_allocated);
+
+ sk_memory_allocated_sub(sk, amt, parent_status);
+
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1759,15 +1830,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
*/
void __sk_mem_reclaim(struct sock *sk)
{
- struct proto *prot = sk->sk_prot;
-
- atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
- prot->memory_allocated);
+ sk_memory_allocated_sub(sk,
+ sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
- if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
- *prot->memory_pressure = 0;
+ if (sk_under_memory_pressure(sk) &&
+ (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
+ sk_leave_memory_pressure(sk);
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2138,16 +2207,15 @@ EXPORT_SYMBOL(sock_get_timestampns);
void sock_enable_timestamp(struct sock *sk, int flag)
{
if (!sock_flag(sk, flag)) {
+ unsigned long previous_flags = sk->sk_flags;
+
sock_set_flag(sk, flag);
/*
* we just set one of the two flags which require net
* time stamping, but time stamping might have been on
* already because of the other one
*/
- if (!sock_flag(sk,
- flag == SOCK_TIMESTAMP ?
- SOCK_TIMESTAMPING_RX_SOFTWARE :
- SOCK_TIMESTAMP))
+ if (!(previous_flags & SK_FLAGS_TIMESTAMP))
net_enable_timestamp();
}
}
@@ -2259,9 +2327,6 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);
-static DEFINE_RWLOCK(proto_list_lock);
-static LIST_HEAD(proto_list);
-
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
@@ -2410,10 +2475,10 @@ int proto_register(struct proto *prot, int alloc_slab)
}
}
- write_lock(&proto_list_lock);
+ mutex_lock(&proto_list_mutex);
list_add(&prot->node, &proto_list);
assign_proto_idx(prot);
- write_unlock(&proto_list_lock);
+ mutex_unlock(&proto_list_mutex);
return 0;
out_free_timewait_sock_slab_name:
@@ -2436,10 +2501,10 @@ EXPORT_SYMBOL(proto_register);
void proto_unregister(struct proto *prot)
{
- write_lock(&proto_list_lock);
+ mutex_lock(&proto_list_mutex);
release_proto_idx(prot);
list_del(&prot->node);
- write_unlock(&proto_list_lock);
+ mutex_unlock(&proto_list_mutex);
if (prot->slab != NULL) {
kmem_cache_destroy(prot->slab);
@@ -2462,9 +2527,9 @@ EXPORT_SYMBOL(proto_unregister);
#ifdef CONFIG_PROC_FS
static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(proto_list_lock)
+ __acquires(proto_list_mutex)
{
- read_lock(&proto_list_lock);
+ mutex_lock(&proto_list_mutex);
return seq_list_start_head(&proto_list, *pos);
}
@@ -2474,25 +2539,36 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void proto_seq_stop(struct seq_file *seq, void *v)
- __releases(proto_list_lock)
+ __releases(proto_list_mutex)
{
- read_unlock(&proto_list_lock);
+ mutex_unlock(&proto_list_mutex);
}
static char proto_method_implemented(const void *method)
{
return method == NULL ? 'n' : 'y';
}
+static long sock_prot_memory_allocated(struct proto *proto)
+{
+ return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L;
+}
+
+static char *sock_prot_memory_pressure(struct proto *proto)
+{
+ return proto->memory_pressure != NULL ?
+ proto_memory_pressure(proto) ? "yes" : "no" : "NI";
+}
static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
+
seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
- proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
- proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
+ sock_prot_memory_allocated(proto),
+ sock_prot_memory_pressure(proto),
proto->max_header,
proto->slab == NULL ? "no" : "yes",
module_name(proto->owner),
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
new file mode 100644
index 000000000000..b9868e1fd62c
--- /dev/null
+++ b/net/core/sock_diag.c
@@ -0,0 +1,192 @@
+#include <linux/mutex.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+static struct sock_diag_handler *sock_diag_handlers[AF_MAX];
+static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
+static DEFINE_MUTEX(sock_diag_table_mutex);
+
+int sock_diag_check_cookie(void *sk, __u32 *cookie)
+{
+ if ((cookie[0] != INET_DIAG_NOCOOKIE ||
+ cookie[1] != INET_DIAG_NOCOOKIE) &&
+ ((u32)(unsigned long)sk != cookie[0] ||
+ (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
+ return -ESTALE;
+ else
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
+
+void sock_diag_save_cookie(void *sk, __u32 *cookie)
+{
+ cookie[0] = (u32)(unsigned long)sk;
+ cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+}
+EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
+
+int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
+{
+ __u32 *mem;
+
+ mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32)));
+
+ mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+ mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+ mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+
+ return 0;
+
+rtattr_failure:
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
+
+void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+{
+ mutex_lock(&sock_diag_table_mutex);
+ inet_rcv_compat = fn;
+ mutex_unlock(&sock_diag_table_mutex);
+}
+EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);
+
+void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
+{
+ mutex_lock(&sock_diag_table_mutex);
+ inet_rcv_compat = NULL;
+ mutex_unlock(&sock_diag_table_mutex);
+}
+EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);
+
+int sock_diag_register(struct sock_diag_handler *hndl)
+{
+ int err = 0;
+
+ if (hndl->family >= AF_MAX)
+ return -EINVAL;
+
+ mutex_lock(&sock_diag_table_mutex);
+ if (sock_diag_handlers[hndl->family])
+ err = -EBUSY;
+ else
+ sock_diag_handlers[hndl->family] = hndl;
+ mutex_unlock(&sock_diag_table_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(sock_diag_register);
+
+void sock_diag_unregister(struct sock_diag_handler *hnld)
+{
+ int family = hnld->family;
+
+ if (family >= AF_MAX)
+ return;
+
+ mutex_lock(&sock_diag_table_mutex);
+ BUG_ON(sock_diag_handlers[family] != hnld);
+ sock_diag_handlers[family] = NULL;
+ mutex_unlock(&sock_diag_table_mutex);
+}
+EXPORT_SYMBOL_GPL(sock_diag_unregister);
+
+static inline struct sock_diag_handler *sock_diag_lock_handler(int family)
+{
+ if (sock_diag_handlers[family] == NULL)
+ request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, family);
+
+ mutex_lock(&sock_diag_table_mutex);
+ return sock_diag_handlers[family];
+}
+
+static inline void sock_diag_unlock_handler(struct sock_diag_handler *h)
+{
+ mutex_unlock(&sock_diag_table_mutex);
+}
+
+static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int err;
+ struct sock_diag_req *req = NLMSG_DATA(nlh);
+ struct sock_diag_handler *hndl;
+
+ if (nlmsg_len(nlh) < sizeof(*req))
+ return -EINVAL;
+
+ hndl = sock_diag_lock_handler(req->sdiag_family);
+ if (hndl == NULL)
+ err = -ENOENT;
+ else
+ err = hndl->dump(skb, nlh);
+ sock_diag_unlock_handler(hndl);
+
+ return err;
+}
+
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int ret;
+
+ switch (nlh->nlmsg_type) {
+ case TCPDIAG_GETSOCK:
+ case DCCPDIAG_GETSOCK:
+ if (inet_rcv_compat == NULL)
+ request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, AF_INET);
+
+ mutex_lock(&sock_diag_table_mutex);
+ if (inet_rcv_compat != NULL)
+ ret = inet_rcv_compat(skb, nlh);
+ else
+ ret = -EOPNOTSUPP;
+ mutex_unlock(&sock_diag_table_mutex);
+
+ return ret;
+ case SOCK_DIAG_BY_FAMILY:
+ return __sock_diag_rcv_msg(skb, nlh);
+ default:
+ return -EINVAL;
+ }
+}
+
+static DEFINE_MUTEX(sock_diag_mutex);
+
+static void sock_diag_rcv(struct sk_buff *skb)
+{
+ mutex_lock(&sock_diag_mutex);
+ netlink_rcv_skb(skb, &sock_diag_rcv_msg);
+ mutex_unlock(&sock_diag_mutex);
+}
+
+struct sock *sock_diag_nlsk;
+EXPORT_SYMBOL_GPL(sock_diag_nlsk);
+
+static int __init sock_diag_init(void)
+{
+ sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0,
+ sock_diag_rcv, NULL, THIS_MODULE);
+ return sock_diag_nlsk == NULL ? -ENOMEM : 0;
+}
+
+static void __exit sock_diag_exit(void)
+{
+ netlink_kernel_release(sock_diag_nlsk);
+}
+
+module_init(sock_diag_init);
+module_exit(sock_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 77a65f031488..d05559d4d9cd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
if (sock_table != orig_sock_table) {
rcu_assign_pointer(rps_sock_flow_table, sock_table);
- synchronize_rcu();
- vfree(orig_sock_table);
+ if (sock_table)
+ jump_label_inc(&rps_needed);
+ if (orig_sock_table) {
+ jump_label_dec(&rps_needed);
+ synchronize_rcu();
+ vfree(orig_sock_table);
+ }
}
}
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 67164bb6ae4d..f053198e730c 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -29,7 +29,7 @@
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-static int ccid2_debug;
+static bool ccid2_debug;
#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
#else
#define ccid2_pr_debug(format, a...)
@@ -174,7 +174,7 @@ out:
/*
* Congestion window validation (RFC 2861).
*/
-static int ccid2_do_cwv = 1;
+static bool ccid2_do_cwv = true;
module_param(ccid2_do_cwv, bool, 0644);
MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3d604e1349c0..560627307200 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -38,7 +38,7 @@
#include <asm/unaligned.h>
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-static int ccid3_debug;
+static bool ccid3_debug;
#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a)
#else
#define ccid3_pr_debug(format, a...)
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 1f94b7e01d39..62b5828acde0 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -8,7 +8,7 @@
#include "tfrc.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-int tfrc_debug;
+bool tfrc_debug;
module_param(tfrc_debug, bool, 0644);
MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
#endif
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index f8ee3f549770..ed698c42a5fb 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -21,7 +21,7 @@
#include "packet_history.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-extern int tfrc_debug;
+extern bool tfrc_debug;
#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a)
#else
#define tfrc_pr_debug(format, a...)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 583490aaf56f..29d6bb629a6c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -39,7 +39,7 @@
"%s: " fmt, __func__, ##a)
#ifdef CONFIG_IP_DCCP_DEBUG
-extern int dccp_debug;
+extern bool dccp_debug;
#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
@@ -357,7 +357,7 @@ static inline int dccp_bad_service_code(const struct sock *sk,
struct dccp_skb_cb {
union {
struct inet_skb_parm h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct inet6_skb_parm h6;
#endif
} header;
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index b21f261da75e..8f1625753377 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -48,11 +48,23 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
dccp_get_info(sk, _info);
}
+static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
+}
+
+static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
+}
+
static const struct inet_diag_handler dccp_diag_handler = {
- .idiag_hashinfo = &dccp_hashinfo,
+ .dump = dccp_diag_dump,
+ .dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
- .idiag_type = DCCPDIAG_GETSOCK,
- .idiag_info_size = sizeof(struct tcp_info),
+ .idiag_type = IPPROTO_DCCP,
};
static int __init dccp_diag_init(void)
@@ -71,4 +83,4 @@ module_exit(dccp_diag_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
MODULE_DESCRIPTION("DCCP inet_diag handler");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-33 /* AF_INET - IPPROTO_DCCP */);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 23cea0ee3101..78a2ad70e1b0 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -490,8 +490,8 @@ static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
new->feat_num = feat;
new->is_local = local;
new->state = FEAT_INITIALISING;
- new->needs_confirm = 0;
- new->empty_confirm = 0;
+ new->needs_confirm = false;
+ new->empty_confirm = false;
new->val = *fval;
new->needs_mandatory = mandatory;
@@ -517,12 +517,12 @@ static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
new->feat_num = feat;
new->is_local = local;
new->state = FEAT_STABLE; /* transition in 6.6.2 */
- new->needs_confirm = 1;
+ new->needs_confirm = true;
new->empty_confirm = (fval == NULL);
new->val.nn = 0; /* zeroes the whole structure */
if (!new->empty_confirm)
new->val = *fval;
- new->needs_mandatory = 0;
+ new->needs_mandatory = false;
return 0;
}
@@ -1155,7 +1155,7 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
}
if (dccp_feat_reconcile(&entry->val, val, len, server, true)) {
- entry->empty_confirm = 0;
+ entry->empty_confirm = false;
} else if (is_mandatory) {
return DCCP_RESET_CODE_MANDATORY_ERROR;
} else if (entry->state == FEAT_INITIALISING) {
@@ -1171,10 +1171,10 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
defval = dccp_feat_default_value(feat);
if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true))
return DCCP_RESET_CODE_OPTION_ERROR;
- entry->empty_confirm = 1;
+ entry->empty_confirm = true;
}
- entry->needs_confirm = 1;
- entry->needs_mandatory = 0;
+ entry->needs_confirm = true;
+ entry->needs_mandatory = false;
entry->state = FEAT_STABLE;
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 90a919afbed7..1c67fe8ff90d 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -111,6 +111,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
rt = NULL;
goto failure;
}
@@ -473,10 +474,11 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct rtable *rt;
+ const struct iphdr *iph = ip_hdr(skb);
struct flowi4 fl4 = {
.flowi4_oif = skb_rtable(skb)->rt_iif,
- .daddr = ip_hdr(skb)->saddr,
- .saddr = ip_hdr(skb)->daddr,
+ .daddr = iph->saddr,
+ .saddr = iph->daddr,
.flowi4_tos = RT_CONN_FLAGS(sk),
.flowi4_proto = sk->sk_protocol,
.fl4_sport = dccp_hdr(skb)->dccph_dport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 17ee85ce148d..ce903f747e64 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -150,8 +150,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
*/
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
- ipv6_addr_copy(&fl6.saddr, &np->saddr);
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
@@ -244,8 +244,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
- ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+ fl6.daddr = ireq6->rmt_addr;
+ fl6.saddr = ireq6->loc_addr;
fl6.flowlabel = 0;
fl6.flowi6_oif = ireq6->iif;
fl6.fl6_dport = inet_rsk(req)->rmt_port;
@@ -270,7 +270,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
dh->dccph_checksum = dccp_v6_csum_finish(skb,
&ireq6->loc_addr,
&ireq6->rmt_addr);
- ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+ fl6.daddr = ireq6->rmt_addr;
err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
err = net_xmit_eval(err);
}
@@ -313,8 +313,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
&rxip6h->daddr);
memset(&fl6, 0, sizeof(fl6));
- ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr);
- ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr);
+ fl6.daddr = rxip6h->saddr;
+ fl6.saddr = rxip6h->daddr;
fl6.flowi6_proto = IPPROTO_DCCP;
fl6.flowi6_oif = inet6_iif(rxskb);
@@ -419,8 +419,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq6 = inet6_rsk(req);
- ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+ ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq6->loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -491,7 +491,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
- ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+ newnp->rcv_saddr = newnp->saddr;
inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -526,9 +526,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+ fl6.daddr = ireq6->rmt_addr;
final_p = fl6_update_dst(&fl6, opt, &final);
- ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+ fl6.saddr = ireq6->loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = inet_rsk(req)->rmt_port;
fl6.fl6_sport = inet_rsk(req)->loc_port;
@@ -559,9 +559,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr);
- ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr);
- ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
+ newnp->daddr = ireq6->rmt_addr;
+ newnp->saddr = ireq6->loc_addr;
+ newnp->rcv_saddr = ireq6->loc_addr;
newsk->sk_bound_dev_if = ireq6->iif;
/* Now IPv6 options...
@@ -877,7 +877,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+ usin->sin6_addr = flowlabel->dst;
fl6_sock_release(flowlabel);
}
}
@@ -910,7 +910,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
return -EINVAL;
}
- ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+ np->daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
/*
@@ -949,8 +949,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
saddr = &np->rcv_saddr;
fl6.flowi6_proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
- ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr);
+ fl6.daddr = np->daddr;
+ fl6.saddr = saddr ? *saddr : np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
@@ -966,11 +966,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (saddr == NULL) {
saddr = &fl6.saddr;
- ipv6_addr_copy(&np->rcv_saddr, saddr);
+ np->rcv_saddr = *saddr;
}
/* set the source address */
- ipv6_addr_copy(&np->saddr, saddr);
+ np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
__ip6_dst_store(sk, dst, NULL, NULL);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index d7041a0963af..5a7f90bbffac 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -53,15 +53,15 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (tw != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_timewait_sock *tw6;
tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
tw6 = inet6_twsk((struct sock *)tw);
- ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
- ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+ tw6->tw_v6_daddr = np->daddr;
+ tw6->tw_v6_rcv_saddr = np->rcv_saddr;
tw->tw_ipv6only = np->ipv6only;
}
#endif
@@ -100,7 +100,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
* (* Generate a new socket and switch to that socket *)
* Set S := new socket for this port pair
*/
- struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+ struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
struct dccp_request_sock *dreq = dccp_rsk(req);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 4b2ab657ac8e..68fa6b7a3e01 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -544,7 +544,7 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
}
if (unlikely(val == NULL || len == 0))
- len = repeat_first = 0;
+ len = repeat_first = false;
tot_len = 3 + repeat_first + len;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 33d0e6297c21..0a8d6ebd9b45 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -152,6 +152,17 @@ static const struct file_operations dccpprobe_fops = {
.llseek = noop_llseek,
};
+static __init int setup_jprobe(void)
+{
+ int ret = register_jprobe(&dccp_send_probe);
+
+ if (ret) {
+ request_module("dccp");
+ ret = register_jprobe(&dccp_send_probe);
+ }
+ return ret;
+}
+
static __init int dccpprobe_init(void)
{
int ret = -ENOMEM;
@@ -163,8 +174,7 @@ static __init int dccpprobe_init(void)
if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
goto err0;
- try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0,
- "dccp");
+ ret = setup_jprobe();
if (ret)
goto err1;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index e742f90a6858..7065c0ae1e7b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1099,7 +1099,7 @@ module_param(thash_entries, int, 0444);
MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
#ifdef CONFIG_IP_DCCP_DEBUG
-int dccp_debug;
+bool dccp_debug;
module_param(dccp_debug, bool, 0644);
MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 7f0eb087dc11..befe426491ba 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -88,9 +88,9 @@ static const struct neigh_ops dn_phase3_ops = {
static u32 dn_neigh_hash(const void *pkey,
const struct net_device *dev,
- __u32 hash_rnd)
+ __u32 *hash_rnd)
{
- return jhash_2words(*(__u16 *)pkey, 0, hash_rnd);
+ return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]);
}
struct neigh_table dn_neigh_table = {
@@ -107,7 +107,7 @@ struct neigh_table dn_neigh_table = {
.gc_staletime = 60 * HZ,
.reachable_time = 30 * HZ,
.delay_probe_time = 5 * HZ,
- .queue_len = 3,
+ .queue_len_bytes = 64*1024,
.ucast_probes = 0,
.app_probes = 0,
.mcast_probes = 0,
@@ -202,7 +202,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
- struct neighbour *neigh = dst_get_neighbour(dst);
+ struct neighbour *neigh = dst_get_neighbour_noref(dst);
struct net_device *dev = neigh->dev;
char mac_addr[ETH_ALEN];
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a77d16158eb6..f31ce72dca65 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,7 +112,7 @@ static unsigned long dn_rt_deadline;
static int dn_dst_gc(struct dst_ops *ops);
static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
-static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
+static unsigned int dn_dst_mtu(const struct dst_entry *dst);
static void dn_dst_destroy(struct dst_entry *);
static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
static void dn_dst_link_failure(struct sk_buff *);
@@ -135,7 +135,7 @@ static struct dst_ops dn_dst_ops = {
.gc = dn_dst_gc,
.check = dn_dst_check,
.default_advmss = dn_dst_default_advmss,
- .default_mtu = dn_dst_default_mtu,
+ .mtu = dn_dst_mtu,
.cow_metrics = dst_cow_metrics_generic,
.destroy = dn_dst_destroy,
.negative_advice = dn_dst_negative_advice,
@@ -244,7 +244,7 @@ static int dn_dst_gc(struct dst_ops *ops)
*/
static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
{
- struct neighbour *n = dst_get_neighbour(dst);
+ struct neighbour *n = dst_get_neighbour_noref(dst);
u32 min_mtu = 230;
struct dn_dev *dn;
@@ -713,7 +713,7 @@ out:
static int dn_to_neigh_output(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *n = dst_get_neighbour(dst);
+ struct neighbour *n = dst_get_neighbour_noref(dst);
return n->output(n, skb);
}
@@ -728,7 +728,7 @@ static int dn_output(struct sk_buff *skb)
int err = -EINVAL;
- if ((neigh = dst_get_neighbour(dst)) == NULL)
+ if ((neigh = dst_get_neighbour_noref(dst)) == NULL)
goto error;
skb->dev = dev;
@@ -825,9 +825,11 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
}
-static unsigned int dn_dst_default_mtu(const struct dst_entry *dst)
+static unsigned int dn_dst_mtu(const struct dst_entry *dst)
{
- return dst->dev->mtu;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst->dev->mtu;
}
static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
@@ -850,7 +852,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
}
rt->rt_type = res->type;
- if (dev != NULL && dst_get_neighbour(&rt->dst) == NULL) {
+ if (dev != NULL && dst_get_neighbour_noref(&rt->dst) == NULL) {
n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
if (IS_ERR(n))
return PTR_ERR(n);
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index 67f691bd4acf..d9c150cc59a9 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -36,16 +36,13 @@ static void dn_slow_timer(unsigned long arg);
void dn_start_slow_timer(struct sock *sk)
{
- sk->sk_timer.expires = jiffies + SLOW_INTERVAL;
- sk->sk_timer.function = dn_slow_timer;
- sk->sk_timer.data = (unsigned long)sk;
-
- add_timer(&sk->sk_timer);
+ setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
}
void dn_stop_slow_timer(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
}
static void dn_slow_timer(unsigned long arg)
@@ -53,12 +50,10 @@ static void dn_slow_timer(unsigned long arg)
struct sock *sk = (struct sock *)arg;
struct dn_scp *scp = DN_SK(sk);
- sock_hold(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
- sk->sk_timer.expires = jiffies + HZ / 10;
- add_timer(&sk->sk_timer);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10);
goto out;
}
@@ -100,9 +95,7 @@ static void dn_slow_timer(unsigned long arg)
scp->keepalive_fxn(sk);
}
- sk->sk_timer.expires = jiffies + SLOW_INTERVAL;
-
- add_timer(&sk->sk_timer);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
out:
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c53ded2a98df..274791cd7a35 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,5 +1,5 @@
-menuconfig NET_DSA
- bool "Distributed Switch Architecture support"
+config NET_DSA
+ tristate "Distributed Switch Architecture support"
default n
depends on EXPERIMENTAL && NETDEVICES && !S390
select PHYLIB
@@ -23,38 +23,4 @@ config NET_DSA_TAG_TRAILER
bool
default n
-
-# switch drivers
-config NET_DSA_MV88E6XXX
- bool
- default n
-
-config NET_DSA_MV88E6060
- bool "Marvell 88E6060 ethernet switch chip support"
- select NET_DSA_TAG_TRAILER
- ---help---
- This enables support for the Marvell 88E6060 ethernet switch
- chip.
-
-config NET_DSA_MV88E6XXX_NEED_PPU
- bool
- default n
-
-config NET_DSA_MV88E6131
- bool "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
- select NET_DSA_MV88E6XXX
- select NET_DSA_MV88E6XXX_NEED_PPU
- select NET_DSA_TAG_DSA
- ---help---
- This enables support for the Marvell 88E6085/6095/6095F/6131
- ethernet switch chips.
-
-config NET_DSA_MV88E6123_61_65
- bool "Marvell 88E6123/6161/6165 ethernet switch chip support"
- select NET_DSA_MV88E6XXX
- select NET_DSA_TAG_EDSA
- ---help---
- This enables support for the Marvell 88E6123/6161/6165
- ethernet switch chips.
-
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 2374faff4dea..7b9fcbbeda5d 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,13 +1,8 @@
-# tagging formats
-obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
-obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
-obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
-
-# switch drivers
-obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
-obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
-obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o
-obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o
-
# the core
-obj-$(CONFIG_NET_DSA) += dsa.o slave.o
+obj-$(CONFIG_NET_DSA) += dsa_core.o
+dsa_core-y += dsa.o slave.o
+
+# tagging formats
+dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
+dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
+dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0dc1589343c3..88e7c2f3fa0d 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -29,6 +29,7 @@ void register_switch_driver(struct dsa_switch_driver *drv)
list_add_tail(&drv->list, &dsa_switch_drivers);
mutex_unlock(&dsa_switch_drivers_mutex);
}
+EXPORT_SYMBOL_GPL(register_switch_driver);
void unregister_switch_driver(struct dsa_switch_driver *drv)
{
@@ -36,6 +37,7 @@ void unregister_switch_driver(struct dsa_switch_driver *drv)
list_del_init(&drv->list);
mutex_unlock(&dsa_switch_drivers_mutex);
}
+EXPORT_SYMBOL_GPL(unregister_switch_driver);
static struct dsa_switch_driver *
dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name)
@@ -199,29 +201,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
}
-/* hooks for ethertype-less tagging formats *********************************/
-/*
- * The original DSA tag format and some other tag formats have no
- * ethertype, which means that we need to add a little hack to the
- * networking receive path to make sure that received frames get
- * the right ->protocol assigned to them when one of those tag
- * formats is in use.
- */
-bool dsa_uses_dsa_tags(void *dsa_ptr)
-{
- struct dsa_switch_tree *dst = dsa_ptr;
-
- return !!(dst->tag_protocol == htons(ETH_P_DSA));
-}
-
-bool dsa_uses_trailer_tags(void *dsa_ptr)
-{
- struct dsa_switch_tree *dst = dsa_ptr;
-
- return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
-}
-
-
/* link polling *************************************************************/
static void dsa_link_poll_work(struct work_struct *ugly)
{
@@ -419,12 +398,36 @@ static struct platform_driver dsa_driver = {
static int __init dsa_init_module(void)
{
- return platform_driver_register(&dsa_driver);
+ int rc;
+
+ rc = platform_driver_register(&dsa_driver);
+ if (rc)
+ return rc;
+
+#ifdef CONFIG_NET_DSA_TAG_DSA
+ dev_add_pack(&dsa_packet_type);
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+ dev_add_pack(&edsa_packet_type);
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ dev_add_pack(&trailer_packet_type);
+#endif
+ return 0;
}
module_init(dsa_init_module);
static void __exit dsa_cleanup_module(void)
{
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ dev_remove_pack(&trailer_packet_type);
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+ dev_remove_pack(&edsa_packet_type);
+#endif
+#ifdef CONFIG_NET_DSA_TAG_DSA
+ dev_remove_pack(&dsa_packet_type);
+#endif
platform_driver_unregister(&dsa_driver);
}
module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 4b0ea0540442..d4cf5cc747e3 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -11,97 +11,9 @@
#ifndef __DSA_PRIV_H
#define __DSA_PRIV_H
-#include <linux/list.h>
#include <linux/phy.h>
-#include <linux/timer.h>
-#include <linux/workqueue.h>
#include <net/dsa.h>
-struct dsa_switch {
- /*
- * Parent switch tree, and switch index.
- */
- struct dsa_switch_tree *dst;
- int index;
-
- /*
- * Configuration data for this switch.
- */
- struct dsa_chip_data *pd;
-
- /*
- * The used switch driver.
- */
- struct dsa_switch_driver *drv;
-
- /*
- * Reference to mii bus to use.
- */
- struct mii_bus *master_mii_bus;
-
- /*
- * Slave mii_bus and devices for the individual ports.
- */
- u32 dsa_port_mask;
- u32 phys_port_mask;
- struct mii_bus *slave_mii_bus;
- struct net_device *ports[DSA_MAX_PORTS];
-};
-
-struct dsa_switch_tree {
- /*
- * Configuration data for the platform device that owns
- * this dsa switch tree instance.
- */
- struct dsa_platform_data *pd;
-
- /*
- * Reference to network device to use, and which tagging
- * protocol to use.
- */
- struct net_device *master_netdev;
- __be16 tag_protocol;
-
- /*
- * The switch and port to which the CPU is attached.
- */
- s8 cpu_switch;
- s8 cpu_port;
-
- /*
- * Link state polling.
- */
- int link_poll_needed;
- struct work_struct link_poll_work;
- struct timer_list link_poll_timer;
-
- /*
- * Data for the individual switch chips.
- */
- struct dsa_switch *ds[DSA_MAX_SWITCHES];
-};
-
-static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
-{
- return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
-}
-
-static inline u8 dsa_upstream_port(struct dsa_switch *ds)
-{
- struct dsa_switch_tree *dst = ds->dst;
-
- /*
- * If this is the root switch (i.e. the switch that connects
- * to the CPU), return the cpu port number on this switch.
- * Else return the (DSA) port number that connects to the
- * switch that is one hop closer to the cpu.
- */
- if (dst->cpu_switch == ds->index)
- return dst->cpu_port;
- else
- return ds->pd->rtable[dst->cpu_switch];
-}
-
struct dsa_slave_priv {
/*
* The linux network interface corresponding to this
@@ -123,44 +35,8 @@ struct dsa_slave_priv {
struct phy_device *phy;
};
-struct dsa_switch_driver {
- struct list_head list;
-
- __be16 tag_protocol;
- int priv_size;
-
- /*
- * Probing and setup.
- */
- char *(*probe)(struct mii_bus *bus, int sw_addr);
- int (*setup)(struct dsa_switch *ds);
- int (*set_addr)(struct dsa_switch *ds, u8 *addr);
-
- /*
- * Access to the switch's PHY registers.
- */
- int (*phy_read)(struct dsa_switch *ds, int port, int regnum);
- int (*phy_write)(struct dsa_switch *ds, int port,
- int regnum, u16 val);
-
- /*
- * Link state polling and IRQ handling.
- */
- void (*poll_link)(struct dsa_switch *ds);
-
- /*
- * ethtool hardware statistics.
- */
- void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
- void (*get_ethtool_stats)(struct dsa_switch *ds,
- int port, uint64_t *data);
- int (*get_sset_count)(struct dsa_switch *ds);
-};
-
/* dsa.c */
extern char dsa_driver_version[];
-void register_switch_driver(struct dsa_switch_driver *type);
-void unregister_switch_driver(struct dsa_switch_driver *type);
/* slave.c */
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
@@ -170,12 +46,15 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds,
/* tag_dsa.c */
netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev);
+extern struct packet_type dsa_packet_type;
/* tag_edsa.c */
netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev);
+extern struct packet_type edsa_packet_type;
/* tag_trailer.c */
netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev);
+extern struct packet_type trailer_packet_type;
#endif
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
deleted file mode 100644
index 8f4ff5a2c813..000000000000
--- a/net/dsa/mv88e6060.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips
- * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/phy.h>
-#include "dsa_priv.h"
-
-#define REG_PORT(p) (8 + (p))
-#define REG_GLOBAL 0x0f
-
-static int reg_read(struct dsa_switch *ds, int addr, int reg)
-{
- return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg);
-}
-
-#define REG_READ(addr, reg) \
- ({ \
- int __ret; \
- \
- __ret = reg_read(ds, addr, reg); \
- if (__ret < 0) \
- return __ret; \
- __ret; \
- })
-
-
-static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
-{
- return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr,
- reg, val);
-}
-
-#define REG_WRITE(addr, reg, val) \
- ({ \
- int __ret; \
- \
- __ret = reg_write(ds, addr, reg, val); \
- if (__ret < 0) \
- return __ret; \
- })
-
-static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr)
-{
- int ret;
-
- ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
- if (ret >= 0) {
- ret &= 0xfff0;
- if (ret == 0x0600)
- return "Marvell 88E6060";
- }
-
- return NULL;
-}
-
-static int mv88e6060_switch_reset(struct dsa_switch *ds)
-{
- int i;
- int ret;
-
- /*
- * Set all ports to the disabled state.
- */
- for (i = 0; i < 6; i++) {
- ret = REG_READ(REG_PORT(i), 0x04);
- REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
- }
-
- /*
- * Wait for transmit queues to drain.
- */
- msleep(2);
-
- /*
- * Reset the switch.
- */
- REG_WRITE(REG_GLOBAL, 0x0a, 0xa130);
-
- /*
- * Wait up to one second for reset to complete.
- */
- for (i = 0; i < 1000; i++) {
- ret = REG_READ(REG_GLOBAL, 0x00);
- if ((ret & 0x8000) == 0x0000)
- break;
-
- msleep(1);
- }
- if (i == 1000)
- return -ETIMEDOUT;
-
- return 0;
-}
-
-static int mv88e6060_setup_global(struct dsa_switch *ds)
-{
- /*
- * Disable discarding of frames with excessive collisions,
- * set the maximum frame size to 1536 bytes, and mask all
- * interrupt sources.
- */
- REG_WRITE(REG_GLOBAL, 0x04, 0x0800);
-
- /*
- * Enable automatic address learning, set the address
- * database size to 1024 entries, and set the default aging
- * time to 5 minutes.
- */
- REG_WRITE(REG_GLOBAL, 0x0a, 0x2130);
-
- return 0;
-}
-
-static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
-{
- int addr = REG_PORT(p);
-
- /*
- * Do not force flow control, disable Ingress and Egress
- * Header tagging, disable VLAN tunneling, and set the port
- * state to Forwarding. Additionally, if this is the CPU
- * port, enable Ingress and Egress Trailer tagging mode.
- */
- REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003);
-
- /*
- * Port based VLAN map: give each port its own address
- * database, allow the CPU port to talk to each of the 'real'
- * ports, and allow each of the 'real' ports to only talk to
- * the CPU port.
- */
- REG_WRITE(addr, 0x06,
- ((p & 0xf) << 12) |
- (dsa_is_cpu_port(ds, p) ?
- ds->phys_port_mask :
- (1 << ds->dst->cpu_port)));
-
- /*
- * Port Association Vector: when learning source addresses
- * of packets, add the address to the address database using
- * a port bitmap that has only the bit for this port set and
- * the other bits clear.
- */
- REG_WRITE(addr, 0x0b, 1 << p);
-
- return 0;
-}
-
-static int mv88e6060_setup(struct dsa_switch *ds)
-{
- int i;
- int ret;
-
- ret = mv88e6060_switch_reset(ds);
- if (ret < 0)
- return ret;
-
- /* @@@ initialise atu */
-
- ret = mv88e6060_setup_global(ds);
- if (ret < 0)
- return ret;
-
- for (i = 0; i < 6; i++) {
- ret = mv88e6060_setup_port(ds, i);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-
-static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
-{
- REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
- REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
- REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
-
- return 0;
-}
-
-static int mv88e6060_port_to_phy_addr(int port)
-{
- if (port >= 0 && port <= 5)
- return port;
- return -1;
-}
-
-static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum)
-{
- int addr;
-
- addr = mv88e6060_port_to_phy_addr(port);
- if (addr == -1)
- return 0xffff;
-
- return reg_read(ds, addr, regnum);
-}
-
-static int
-mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
-{
- int addr;
-
- addr = mv88e6060_port_to_phy_addr(port);
- if (addr == -1)
- return 0xffff;
-
- return reg_write(ds, addr, regnum, val);
-}
-
-static void mv88e6060_poll_link(struct dsa_switch *ds)
-{
- int i;
-
- for (i = 0; i < DSA_MAX_PORTS; i++) {
- struct net_device *dev;
- int uninitialized_var(port_status);
- int link;
- int speed;
- int duplex;
- int fc;
-
- dev = ds->ports[i];
- if (dev == NULL)
- continue;
-
- link = 0;
- if (dev->flags & IFF_UP) {
- port_status = reg_read(ds, REG_PORT(i), 0x00);
- if (port_status < 0)
- continue;
-
- link = !!(port_status & 0x1000);
- }
-
- if (!link) {
- if (netif_carrier_ok(dev)) {
- printk(KERN_INFO "%s: link down\n", dev->name);
- netif_carrier_off(dev);
- }
- continue;
- }
-
- speed = (port_status & 0x0100) ? 100 : 10;
- duplex = (port_status & 0x0200) ? 1 : 0;
- fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0;
-
- if (!netif_carrier_ok(dev)) {
- printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
- "flow control %sabled\n", dev->name,
- speed, duplex ? "full" : "half",
- fc ? "en" : "dis");
- netif_carrier_on(dev);
- }
- }
-}
-
-static struct dsa_switch_driver mv88e6060_switch_driver = {
- .tag_protocol = htons(ETH_P_TRAILER),
- .probe = mv88e6060_probe,
- .setup = mv88e6060_setup,
- .set_addr = mv88e6060_set_addr,
- .phy_read = mv88e6060_phy_read,
- .phy_write = mv88e6060_phy_write,
- .poll_link = mv88e6060_poll_link,
-};
-
-static int __init mv88e6060_init(void)
-{
- register_switch_driver(&mv88e6060_switch_driver);
- return 0;
-}
-module_init(mv88e6060_init);
-
-static void __exit mv88e6060_cleanup(void)
-{
- unregister_switch_driver(&mv88e6060_switch_driver);
-}
-module_exit(mv88e6060_cleanup);
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c
deleted file mode 100644
index 52faaa21a4d9..000000000000
--- a/net/dsa/mv88e6123_61_65.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support
- * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/phy.h>
-#include "dsa_priv.h"
-#include "mv88e6xxx.h"
-
-static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr)
-{
- int ret;
-
- ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
- if (ret >= 0) {
- ret &= 0xfff0;
- if (ret == 0x1210)
- return "Marvell 88E6123";
- if (ret == 0x1610)
- return "Marvell 88E6161";
- if (ret == 0x1650)
- return "Marvell 88E6165";
- }
-
- return NULL;
-}
-
-static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds)
-{
- int i;
- int ret;
-
- /*
- * Set all ports to the disabled state.
- */
- for (i = 0; i < 8; i++) {
- ret = REG_READ(REG_PORT(i), 0x04);
- REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
- }
-
- /*
- * Wait for transmit queues to drain.
- */
- msleep(2);
-
- /*
- * Reset the switch.
- */
- REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
-
- /*
- * Wait up to one second for reset to complete.
- */
- for (i = 0; i < 1000; i++) {
- ret = REG_READ(REG_GLOBAL, 0x00);
- if ((ret & 0xc800) == 0xc800)
- break;
-
- msleep(1);
- }
- if (i == 1000)
- return -ETIMEDOUT;
-
- return 0;
-}
-
-static int mv88e6123_61_65_setup_global(struct dsa_switch *ds)
-{
- int ret;
- int i;
-
- /*
- * Disable the PHY polling unit (since there won't be any
- * external PHYs to poll), don't discard packets with
- * excessive collisions, and mask all interrupt sources.
- */
- REG_WRITE(REG_GLOBAL, 0x04, 0x0000);
-
- /*
- * Set the default address aging time to 5 minutes, and
- * enable address learn messages to be sent to all message
- * ports.
- */
- REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
-
- /*
- * Configure the priority mapping registers.
- */
- ret = mv88e6xxx_config_prio(ds);
- if (ret < 0)
- return ret;
-
- /*
- * Configure the upstream port, and configure the upstream
- * port as the port to which ingress and egress monitor frames
- * are to be sent.
- */
- REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110));
-
- /*
- * Disable remote management for now, and set the switch's
- * DSA device number.
- */
- REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f);
-
- /*
- * Send all frames with destination addresses matching
- * 01:80:c2:00:00:2x to the CPU port.
- */
- REG_WRITE(REG_GLOBAL2, 0x02, 0xffff);
-
- /*
- * Send all frames with destination addresses matching
- * 01:80:c2:00:00:0x to the CPU port.
- */
- REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
-
- /*
- * Disable the loopback filter, disable flow control
- * messages, disable flood broadcast override, disable
- * removing of provider tags, disable ATU age violation
- * interrupts, disable tag flow control, force flow
- * control priority to the highest, and send all special
- * multicast frames to the CPU at the highest priority.
- */
- REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
-
- /*
- * Program the DSA routing table.
- */
- for (i = 0; i < 32; i++) {
- int nexthop;
-
- nexthop = 0x1f;
- if (i != ds->index && i < ds->dst->pd->nr_chips)
- nexthop = ds->pd->rtable[i] & 0x1f;
-
- REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
- }
-
- /*
- * Clear all trunk masks.
- */
- for (i = 0; i < 8; i++)
- REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff);
-
- /*
- * Clear all trunk mappings.
- */
- for (i = 0; i < 16; i++)
- REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
-
- /*
- * Disable ingress rate limiting by resetting all ingress
- * rate limit registers to their initial state.
- */
- for (i = 0; i < 6; i++)
- REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8));
-
- /*
- * Initialise cross-chip port VLAN table to reset defaults.
- */
- REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000);
-
- /*
- * Clear the priority override table.
- */
- for (i = 0; i < 16; i++)
- REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8));
-
- /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */
-
- return 0;
-}
-
-static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
-{
- int addr = REG_PORT(p);
- u16 val;
-
- /*
- * MAC Forcing register: don't force link, speed, duplex
- * or flow control state to any particular values on physical
- * ports, but force the CPU port and all DSA ports to 1000 Mb/s
- * full duplex.
- */
- if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
- REG_WRITE(addr, 0x01, 0x003e);
- else
- REG_WRITE(addr, 0x01, 0x0003);
-
- /*
- * Do not limit the period of time that this port can be
- * paused for by the remote end or the period of time that
- * this port can pause the remote end.
- */
- REG_WRITE(addr, 0x02, 0x0000);
-
- /*
- * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
- * disable Header mode, enable IGMP/MLD snooping, disable VLAN
- * tunneling, determine priority by looking at 802.1p and IP
- * priority fields (IP prio has precedence), and set STP state
- * to Forwarding.
- *
- * If this is the CPU link, use DSA or EDSA tagging depending
- * on which tagging mode was configured.
- *
- * If this is a link to another switch, use DSA tagging mode.
- *
- * If this is the upstream port for this switch, enable
- * forwarding of unknown unicasts and multicasts.
- */
- val = 0x0433;
- if (dsa_is_cpu_port(ds, p)) {
- if (ds->dst->tag_protocol == htons(ETH_P_EDSA))
- val |= 0x3300;
- else
- val |= 0x0100;
- }
- if (ds->dsa_port_mask & (1 << p))
- val |= 0x0100;
- if (p == dsa_upstream_port(ds))
- val |= 0x000c;
- REG_WRITE(addr, 0x04, val);
-
- /*
- * Port Control 1: disable trunking. Also, if this is the
- * CPU port, enable learn messages to be sent to this port.
- */
- REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
- /*
- * Port based VLAN map: give each port its own address
- * database, allow the CPU port to talk to each of the 'real'
- * ports, and allow each of the 'real' ports to only talk to
- * the upstream port.
- */
- val = (p & 0xf) << 12;
- if (dsa_is_cpu_port(ds, p))
- val |= ds->phys_port_mask;
- else
- val |= 1 << dsa_upstream_port(ds);
- REG_WRITE(addr, 0x06, val);
-
- /*
- * Default VLAN ID and priority: don't set a default VLAN
- * ID, and set the default packet priority to zero.
- */
- REG_WRITE(addr, 0x07, 0x0000);
-
- /*
- * Port Control 2: don't force a good FCS, set the maximum
- * frame size to 10240 bytes, don't let the switch add or
- * strip 802.1q tags, don't discard tagged or untagged frames
- * on this port, do a destination address lookup on all
- * received packets as usual, disable ARP mirroring and don't
- * send a copy of all transmitted/received frames on this port
- * to the CPU.
- */
- REG_WRITE(addr, 0x08, 0x2080);
-
- /*
- * Egress rate control: disable egress rate control.
- */
- REG_WRITE(addr, 0x09, 0x0001);
-
- /*
- * Egress rate control 2: disable egress rate control.
- */
- REG_WRITE(addr, 0x0a, 0x0000);
-
- /*
- * Port Association Vector: when learning source addresses
- * of packets, add the address to the address database using
- * a port bitmap that has only the bit for this port set and
- * the other bits clear.
- */
- REG_WRITE(addr, 0x0b, 1 << p);
-
- /*
- * Port ATU control: disable limiting the number of address
- * database entries that this port is allowed to use.
- */
- REG_WRITE(addr, 0x0c, 0x0000);
-
- /*
- * Priorit Override: disable DA, SA and VTU priority override.
- */
- REG_WRITE(addr, 0x0d, 0x0000);
-
- /*
- * Port Ethertype: use the Ethertype DSA Ethertype value.
- */
- REG_WRITE(addr, 0x0f, ETH_P_EDSA);
-
- /*
- * Tag Remap: use an identity 802.1p prio -> switch prio
- * mapping.
- */
- REG_WRITE(addr, 0x18, 0x3210);
-
- /*
- * Tag Remap 2: use an identity 802.1p prio -> switch prio
- * mapping.
- */
- REG_WRITE(addr, 0x19, 0x7654);
-
- return 0;
-}
-
-static int mv88e6123_61_65_setup(struct dsa_switch *ds)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
- int i;
- int ret;
-
- mutex_init(&ps->smi_mutex);
- mutex_init(&ps->stats_mutex);
-
- ret = mv88e6123_61_65_switch_reset(ds);
- if (ret < 0)
- return ret;
-
- /* @@@ initialise vtu and atu */
-
- ret = mv88e6123_61_65_setup_global(ds);
- if (ret < 0)
- return ret;
-
- for (i = 0; i < 6; i++) {
- ret = mv88e6123_61_65_setup_port(ds, i);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-
-static int mv88e6123_61_65_port_to_phy_addr(int port)
-{
- if (port >= 0 && port <= 4)
- return port;
- return -1;
-}
-
-static int
-mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum)
-{
- int addr = mv88e6123_61_65_port_to_phy_addr(port);
- return mv88e6xxx_phy_read(ds, addr, regnum);
-}
-
-static int
-mv88e6123_61_65_phy_write(struct dsa_switch *ds,
- int port, int regnum, u16 val)
-{
- int addr = mv88e6123_61_65_port_to_phy_addr(port);
- return mv88e6xxx_phy_write(ds, addr, regnum, val);
-}
-
-static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = {
- { "in_good_octets", 8, 0x00, },
- { "in_bad_octets", 4, 0x02, },
- { "in_unicast", 4, 0x04, },
- { "in_broadcasts", 4, 0x06, },
- { "in_multicasts", 4, 0x07, },
- { "in_pause", 4, 0x16, },
- { "in_undersize", 4, 0x18, },
- { "in_fragments", 4, 0x19, },
- { "in_oversize", 4, 0x1a, },
- { "in_jabber", 4, 0x1b, },
- { "in_rx_error", 4, 0x1c, },
- { "in_fcs_error", 4, 0x1d, },
- { "out_octets", 8, 0x0e, },
- { "out_unicast", 4, 0x10, },
- { "out_broadcasts", 4, 0x13, },
- { "out_multicasts", 4, 0x12, },
- { "out_pause", 4, 0x15, },
- { "excessive", 4, 0x11, },
- { "collisions", 4, 0x1e, },
- { "deferred", 4, 0x05, },
- { "single", 4, 0x14, },
- { "multiple", 4, 0x17, },
- { "out_fcs_error", 4, 0x03, },
- { "late", 4, 0x1f, },
- { "hist_64bytes", 4, 0x08, },
- { "hist_65_127bytes", 4, 0x09, },
- { "hist_128_255bytes", 4, 0x0a, },
- { "hist_256_511bytes", 4, 0x0b, },
- { "hist_512_1023bytes", 4, 0x0c, },
- { "hist_1024_max_bytes", 4, 0x0d, },
-};
-
-static void
-mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
-{
- mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
- mv88e6123_61_65_hw_stats, port, data);
-}
-
-static void
-mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds,
- int port, uint64_t *data)
-{
- mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
- mv88e6123_61_65_hw_stats, port, data);
-}
-
-static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds)
-{
- return ARRAY_SIZE(mv88e6123_61_65_hw_stats);
-}
-
-static struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
- .tag_protocol = cpu_to_be16(ETH_P_EDSA),
- .priv_size = sizeof(struct mv88e6xxx_priv_state),
- .probe = mv88e6123_61_65_probe,
- .setup = mv88e6123_61_65_setup,
- .set_addr = mv88e6xxx_set_addr_indirect,
- .phy_read = mv88e6123_61_65_phy_read,
- .phy_write = mv88e6123_61_65_phy_write,
- .poll_link = mv88e6xxx_poll_link,
- .get_strings = mv88e6123_61_65_get_strings,
- .get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats,
- .get_sset_count = mv88e6123_61_65_get_sset_count,
-};
-
-static int __init mv88e6123_61_65_init(void)
-{
- register_switch_driver(&mv88e6123_61_65_switch_driver);
- return 0;
-}
-module_init(mv88e6123_61_65_init);
-
-static void __exit mv88e6123_61_65_cleanup(void)
-{
- unregister_switch_driver(&mv88e6123_61_65_switch_driver);
-}
-module_exit(mv88e6123_61_65_cleanup);
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
deleted file mode 100644
index 9bd1061fa4ee..000000000000
--- a/net/dsa/mv88e6131.c
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support
- * Copyright (c) 2008-2009 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/phy.h>
-#include "dsa_priv.h"
-#include "mv88e6xxx.h"
-
-/*
- * Switch product IDs
- */
-#define ID_6085 0x04a0
-#define ID_6095 0x0950
-#define ID_6131 0x1060
-
-static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
-{
- int ret;
-
- ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
- if (ret >= 0) {
- ret &= 0xfff0;
- if (ret == ID_6085)
- return "Marvell 88E6085";
- if (ret == ID_6095)
- return "Marvell 88E6095/88E6095F";
- if (ret == ID_6131)
- return "Marvell 88E6131";
- }
-
- return NULL;
-}
-
-static int mv88e6131_switch_reset(struct dsa_switch *ds)
-{
- int i;
- int ret;
-
- /*
- * Set all ports to the disabled state.
- */
- for (i = 0; i < 11; i++) {
- ret = REG_READ(REG_PORT(i), 0x04);
- REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
- }
-
- /*
- * Wait for transmit queues to drain.
- */
- msleep(2);
-
- /*
- * Reset the switch.
- */
- REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
-
- /*
- * Wait up to one second for reset to complete.
- */
- for (i = 0; i < 1000; i++) {
- ret = REG_READ(REG_GLOBAL, 0x00);
- if ((ret & 0xc800) == 0xc800)
- break;
-
- msleep(1);
- }
- if (i == 1000)
- return -ETIMEDOUT;
-
- return 0;
-}
-
-static int mv88e6131_setup_global(struct dsa_switch *ds)
-{
- int ret;
- int i;
-
- /*
- * Enable the PHY polling unit, don't discard packets with
- * excessive collisions, use a weighted fair queueing scheme
- * to arbitrate between packet queues, set the maximum frame
- * size to 1632, and mask all interrupt sources.
- */
- REG_WRITE(REG_GLOBAL, 0x04, 0x4400);
-
- /*
- * Set the default address aging time to 5 minutes, and
- * enable address learn messages to be sent to all message
- * ports.
- */
- REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
-
- /*
- * Configure the priority mapping registers.
- */
- ret = mv88e6xxx_config_prio(ds);
- if (ret < 0)
- return ret;
-
- /*
- * Set the VLAN ethertype to 0x8100.
- */
- REG_WRITE(REG_GLOBAL, 0x19, 0x8100);
-
- /*
- * Disable ARP mirroring, and configure the upstream port as
- * the port to which ingress and egress monitor frames are to
- * be sent.
- */
- REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0);
-
- /*
- * Disable cascade port functionality unless this device
- * is used in a cascade configuration, and set the switch's
- * DSA device number.
- */
- if (ds->dst->pd->nr_chips > 1)
- REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f));
- else
- REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f));
-
- /*
- * Send all frames with destination addresses matching
- * 01:80:c2:00:00:0x to the CPU port.
- */
- REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
-
- /*
- * Ignore removed tag data on doubly tagged packets, disable
- * flow control messages, force flow control priority to the
- * highest, and send all special multicast frames to the CPU
- * port at the highest priority.
- */
- REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
-
- /*
- * Program the DSA routing table.
- */
- for (i = 0; i < 32; i++) {
- int nexthop;
-
- nexthop = 0x1f;
- if (i != ds->index && i < ds->dst->pd->nr_chips)
- nexthop = ds->pd->rtable[i] & 0x1f;
-
- REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
- }
-
- /*
- * Clear all trunk masks.
- */
- for (i = 0; i < 8; i++)
- REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff);
-
- /*
- * Clear all trunk mappings.
- */
- for (i = 0; i < 16; i++)
- REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
-
- /*
- * Force the priority of IGMP/MLD snoop frames and ARP frames
- * to the highest setting.
- */
- REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff);
-
- return 0;
-}
-
-static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
- int addr = REG_PORT(p);
- u16 val;
-
- /*
- * MAC Forcing register: don't force link, speed, duplex
- * or flow control state to any particular values on physical
- * ports, but force the CPU port and all DSA ports to 1000 Mb/s
- * (100 Mb/s on 6085) full duplex.
- */
- if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
- if (ps->id == ID_6085)
- REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */
- else
- REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */
- else
- REG_WRITE(addr, 0x01, 0x0003);
-
- /*
- * Port Control: disable Core Tag, disable Drop-on-Lock,
- * transmit frames unmodified, disable Header mode,
- * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN
- * tunneling, determine priority by looking at 802.1p and
- * IP priority fields (IP prio has precedence), and set STP
- * state to Forwarding.
- *
- * If this is the upstream port for this switch, enable
- * forwarding of unknown unicasts, and enable DSA tagging
- * mode.
- *
- * If this is the link to another switch, use DSA tagging
- * mode, but do not enable forwarding of unknown unicasts.
- */
- val = 0x0433;
- if (p == dsa_upstream_port(ds)) {
- val |= 0x0104;
- /*
- * On 6085, unknown multicast forward is controlled
- * here rather than in Port Control 2 register.
- */
- if (ps->id == ID_6085)
- val |= 0x0008;
- }
- if (ds->dsa_port_mask & (1 << p))
- val |= 0x0100;
- REG_WRITE(addr, 0x04, val);
-
- /*
- * Port Control 1: disable trunking. Also, if this is the
- * CPU port, enable learn messages to be sent to this port.
- */
- REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
-
- /*
- * Port based VLAN map: give each port its own address
- * database, allow the CPU port to talk to each of the 'real'
- * ports, and allow each of the 'real' ports to only talk to
- * the upstream port.
- */
- val = (p & 0xf) << 12;
- if (dsa_is_cpu_port(ds, p))
- val |= ds->phys_port_mask;
- else
- val |= 1 << dsa_upstream_port(ds);
- REG_WRITE(addr, 0x06, val);
-
- /*
- * Default VLAN ID and priority: don't set a default VLAN
- * ID, and set the default packet priority to zero.
- */
- REG_WRITE(addr, 0x07, 0x0000);
-
- /*
- * Port Control 2: don't force a good FCS, don't use
- * VLAN-based, source address-based or destination
- * address-based priority overrides, don't let the switch
- * add or strip 802.1q tags, don't discard tagged or
- * untagged frames on this port, do a destination address
- * lookup on received packets as usual, don't send a copy
- * of all transmitted/received frames on this port to the
- * CPU, and configure the upstream port number.
- *
- * If this is the upstream port for this switch, enable
- * forwarding of unknown multicast addresses.
- */
- if (ps->id == ID_6085)
- /*
- * on 6085, bits 3:0 are reserved, bit 6 control ARP
- * mirroring, and multicast forward is handled in
- * Port Control register.
- */
- REG_WRITE(addr, 0x08, 0x0080);
- else {
- val = 0x0080 | dsa_upstream_port(ds);
- if (p == dsa_upstream_port(ds))
- val |= 0x0040;
- REG_WRITE(addr, 0x08, val);
- }
-
- /*
- * Rate Control: disable ingress rate limiting.
- */
- REG_WRITE(addr, 0x09, 0x0000);
-
- /*
- * Rate Control 2: disable egress rate limiting.
- */
- REG_WRITE(addr, 0x0a, 0x0000);
-
- /*
- * Port Association Vector: when learning source addresses
- * of packets, add the address to the address database using
- * a port bitmap that has only the bit for this port set and
- * the other bits clear.
- */
- REG_WRITE(addr, 0x0b, 1 << p);
-
- /*
- * Tag Remap: use an identity 802.1p prio -> switch prio
- * mapping.
- */
- REG_WRITE(addr, 0x18, 0x3210);
-
- /*
- * Tag Remap 2: use an identity 802.1p prio -> switch prio
- * mapping.
- */
- REG_WRITE(addr, 0x19, 0x7654);
-
- return 0;
-}
-
-static int mv88e6131_setup(struct dsa_switch *ds)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
- int i;
- int ret;
-
- mutex_init(&ps->smi_mutex);
- mv88e6xxx_ppu_state_init(ds);
- mutex_init(&ps->stats_mutex);
-
- ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
-
- ret = mv88e6131_switch_reset(ds);
- if (ret < 0)
- return ret;
-
- /* @@@ initialise vtu and atu */
-
- ret = mv88e6131_setup_global(ds);
- if (ret < 0)
- return ret;
-
- for (i = 0; i < 11; i++) {
- ret = mv88e6131_setup_port(ds, i);
- if (ret < 0)
- return ret;
- }
-
- return 0;
-}
-
-static int mv88e6131_port_to_phy_addr(int port)
-{
- if (port >= 0 && port <= 11)
- return port;
- return -1;
-}
-
-static int
-mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum)
-{
- int addr = mv88e6131_port_to_phy_addr(port);
- return mv88e6xxx_phy_read_ppu(ds, addr, regnum);
-}
-
-static int
-mv88e6131_phy_write(struct dsa_switch *ds,
- int port, int regnum, u16 val)
-{
- int addr = mv88e6131_port_to_phy_addr(port);
- return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val);
-}
-
-static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = {
- { "in_good_octets", 8, 0x00, },
- { "in_bad_octets", 4, 0x02, },
- { "in_unicast", 4, 0x04, },
- { "in_broadcasts", 4, 0x06, },
- { "in_multicasts", 4, 0x07, },
- { "in_pause", 4, 0x16, },
- { "in_undersize", 4, 0x18, },
- { "in_fragments", 4, 0x19, },
- { "in_oversize", 4, 0x1a, },
- { "in_jabber", 4, 0x1b, },
- { "in_rx_error", 4, 0x1c, },
- { "in_fcs_error", 4, 0x1d, },
- { "out_octets", 8, 0x0e, },
- { "out_unicast", 4, 0x10, },
- { "out_broadcasts", 4, 0x13, },
- { "out_multicasts", 4, 0x12, },
- { "out_pause", 4, 0x15, },
- { "excessive", 4, 0x11, },
- { "collisions", 4, 0x1e, },
- { "deferred", 4, 0x05, },
- { "single", 4, 0x14, },
- { "multiple", 4, 0x17, },
- { "out_fcs_error", 4, 0x03, },
- { "late", 4, 0x1f, },
- { "hist_64bytes", 4, 0x08, },
- { "hist_65_127bytes", 4, 0x09, },
- { "hist_128_255bytes", 4, 0x0a, },
- { "hist_256_511bytes", 4, 0x0b, },
- { "hist_512_1023bytes", 4, 0x0c, },
- { "hist_1024_max_bytes", 4, 0x0d, },
-};
-
-static void
-mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
-{
- mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats),
- mv88e6131_hw_stats, port, data);
-}
-
-static void
-mv88e6131_get_ethtool_stats(struct dsa_switch *ds,
- int port, uint64_t *data)
-{
- mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats),
- mv88e6131_hw_stats, port, data);
-}
-
-static int mv88e6131_get_sset_count(struct dsa_switch *ds)
-{
- return ARRAY_SIZE(mv88e6131_hw_stats);
-}
-
-static struct dsa_switch_driver mv88e6131_switch_driver = {
- .tag_protocol = cpu_to_be16(ETH_P_DSA),
- .priv_size = sizeof(struct mv88e6xxx_priv_state),
- .probe = mv88e6131_probe,
- .setup = mv88e6131_setup,
- .set_addr = mv88e6xxx_set_addr_direct,
- .phy_read = mv88e6131_phy_read,
- .phy_write = mv88e6131_phy_write,
- .poll_link = mv88e6xxx_poll_link,
- .get_strings = mv88e6131_get_strings,
- .get_ethtool_stats = mv88e6131_get_ethtool_stats,
- .get_sset_count = mv88e6131_get_sset_count,
-};
-
-static int __init mv88e6131_init(void)
-{
- register_switch_driver(&mv88e6131_switch_driver);
- return 0;
-}
-module_init(mv88e6131_init);
-
-static void __exit mv88e6131_cleanup(void)
-{
- unregister_switch_driver(&mv88e6131_switch_driver);
-}
-module_exit(mv88e6131_cleanup);
diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c
deleted file mode 100644
index efe661a9def4..000000000000
--- a/net/dsa/mv88e6xxx.c
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support
- * Copyright (c) 2008 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/phy.h>
-#include "dsa_priv.h"
-#include "mv88e6xxx.h"
-
-/*
- * If the switch's ADDR[4:0] strap pins are strapped to zero, it will
- * use all 32 SMI bus addresses on its SMI bus, and all switch registers
- * will be directly accessible on some {device address,register address}
- * pair. If the ADDR[4:0] pins are not strapped to zero, the switch
- * will only respond to SMI transactions to that specific address, and
- * an indirect addressing mechanism needs to be used to access its
- * registers.
- */
-static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr)
-{
- int ret;
- int i;
-
- for (i = 0; i < 16; i++) {
- ret = mdiobus_read(bus, sw_addr, 0);
- if (ret < 0)
- return ret;
-
- if ((ret & 0x8000) == 0)
- return 0;
- }
-
- return -ETIMEDOUT;
-}
-
-int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
-{
- int ret;
-
- if (sw_addr == 0)
- return mdiobus_read(bus, addr, reg);
-
- /*
- * Wait for the bus to become free.
- */
- ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
- if (ret < 0)
- return ret;
-
- /*
- * Transmit the read command.
- */
- ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg);
- if (ret < 0)
- return ret;
-
- /*
- * Wait for the read command to complete.
- */
- ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
- if (ret < 0)
- return ret;
-
- /*
- * Read the data.
- */
- ret = mdiobus_read(bus, sw_addr, 1);
- if (ret < 0)
- return ret;
-
- return ret & 0xffff;
-}
-
-int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
- int ret;
-
- mutex_lock(&ps->smi_mutex);
- ret = __mv88e6xxx_reg_read(ds->master_mii_bus,
- ds->pd->sw_addr, addr, reg);
- mutex_unlock(&ps->smi_mutex);
-
- return ret;
-}
-
-int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
- int reg, u16 val)
-{
- int ret;
-
- if (sw_addr == 0)
- return mdiobus_write(bus, addr, reg, val);
-
- /*
- * Wait for the bus to become free.
- */
- ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
- if (ret < 0)
- return ret;
-
- /*
- * Transmit the data to write.
- */
- ret = mdiobus_write(bus, sw_addr, 1, val);
- if (ret < 0)
- return ret;
-
- /*
- * Transmit the write command.
- */
- ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg);
- if (ret < 0)
- return ret;
-
- /*
- * Wait for the write command to complete.
- */
- ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
- if (ret < 0)
- return ret;
-
- return 0;
-}
-
-int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
- int ret;
-
- mutex_lock(&ps->smi_mutex);
- ret = __mv88e6xxx_reg_write(ds->master_mii_bus,
- ds->pd->sw_addr, addr, reg, val);
- mutex_unlock(&ps->smi_mutex);
-
- return ret;
-}
-
-int mv88e6xxx_config_prio(struct dsa_switch *ds)
-{
- /*
- * Configure the IP ToS mapping registers.
- */
- REG_WRITE(REG_GLOBAL, 0x10, 0x0000);
- REG_WRITE(REG_GLOBAL, 0x11, 0x0000);
- REG_WRITE(REG_GLOBAL, 0x12, 0x5555);
- REG_WRITE(REG_GLOBAL, 0x13, 0x5555);
- REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa);
- REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa);
- REG_WRITE(REG_GLOBAL, 0x16, 0xffff);
- REG_WRITE(REG_GLOBAL, 0x17, 0xffff);
-
- /*
- * Configure the IEEE 802.1p priority mapping register.
- */
- REG_WRITE(REG_GLOBAL, 0x18, 0xfa41);
-
- return 0;
-}
-
-int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr)
-{
- REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
- REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
- REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
-
- return 0;
-}
-
-int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
-{
- int i;
- int ret;
-
- for (i = 0; i < 6; i++) {
- int j;
-
- /*
- * Write the MAC address byte.
- */
- REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]);
-
- /*
- * Wait for the write to complete.
- */
- for (j = 0; j < 16; j++) {
- ret = REG_READ(REG_GLOBAL2, 0x0d);
- if ((ret & 0x8000) == 0)
- break;
- }
- if (j == 16)
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
-int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum)
-{
- if (addr >= 0)
- return mv88e6xxx_reg_read(ds, addr, regnum);
- return 0xffff;
-}
-
-int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val)
-{
- if (addr >= 0)
- return mv88e6xxx_reg_write(ds, addr, regnum, val);
- return 0;
-}
-
-#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
-static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
-{
- int ret;
- int i;
-
- ret = REG_READ(REG_GLOBAL, 0x04);
- REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000);
-
- for (i = 0; i < 1000; i++) {
- ret = REG_READ(REG_GLOBAL, 0x00);
- msleep(1);
- if ((ret & 0xc000) != 0xc000)
- return 0;
- }
-
- return -ETIMEDOUT;
-}
-
-static int mv88e6xxx_ppu_enable(struct dsa_switch *ds)
-{
- int ret;
- int i;
-
- ret = REG_READ(REG_GLOBAL, 0x04);
- REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000);
-
- for (i = 0; i < 1000; i++) {
- ret = REG_READ(REG_GLOBAL, 0x00);
- msleep(1);
- if ((ret & 0xc000) == 0xc000)
- return 0;
- }
-
- return -ETIMEDOUT;
-}
-
-static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly)
-{
- struct mv88e6xxx_priv_state *ps;
-
- ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work);
- if (mutex_trylock(&ps->ppu_mutex)) {
- struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1;
-
- if (mv88e6xxx_ppu_enable(ds) == 0)
- ps->ppu_disabled = 0;
- mutex_unlock(&ps->ppu_mutex);
- }
-}
-
-static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps)
-{
- struct mv88e6xxx_priv_state *ps = (void *)_ps;
-
- schedule_work(&ps->ppu_work);
-}
-
-static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
- int ret;
-
- mutex_lock(&ps->ppu_mutex);
-
- /*
- * If the PHY polling unit is enabled, disable it so that
- * we can access the PHY registers. If it was already
- * disabled, cancel the timer that is going to re-enable
- * it.
- */
- if (!ps->ppu_disabled) {
- ret = mv88e6xxx_ppu_disable(ds);
- if (ret < 0) {
- mutex_unlock(&ps->ppu_mutex);
- return ret;
- }
- ps->ppu_disabled = 1;
- } else {
- del_timer(&ps->ppu_timer);
- ret = 0;
- }
-
- return ret;
-}
-
-static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
-
- /*
- * Schedule a timer to re-enable the PHY polling unit.
- */
- mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10));
- mutex_unlock(&ps->ppu_mutex);
-}
-
-void mv88e6xxx_ppu_state_init(struct dsa_switch *ds)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
-
- mutex_init(&ps->ppu_mutex);
- INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work);
- init_timer(&ps->ppu_timer);
- ps->ppu_timer.data = (unsigned long)ps;
- ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
-}
-
-int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum)
-{
- int ret;
-
- ret = mv88e6xxx_ppu_access_get(ds);
- if (ret >= 0) {
- ret = mv88e6xxx_reg_read(ds, addr, regnum);
- mv88e6xxx_ppu_access_put(ds);
- }
-
- return ret;
-}
-
-int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
- int regnum, u16 val)
-{
- int ret;
-
- ret = mv88e6xxx_ppu_access_get(ds);
- if (ret >= 0) {
- ret = mv88e6xxx_reg_write(ds, addr, regnum, val);
- mv88e6xxx_ppu_access_put(ds);
- }
-
- return ret;
-}
-#endif
-
-void mv88e6xxx_poll_link(struct dsa_switch *ds)
-{
- int i;
-
- for (i = 0; i < DSA_MAX_PORTS; i++) {
- struct net_device *dev;
- int uninitialized_var(port_status);
- int link;
- int speed;
- int duplex;
- int fc;
-
- dev = ds->ports[i];
- if (dev == NULL)
- continue;
-
- link = 0;
- if (dev->flags & IFF_UP) {
- port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00);
- if (port_status < 0)
- continue;
-
- link = !!(port_status & 0x0800);
- }
-
- if (!link) {
- if (netif_carrier_ok(dev)) {
- printk(KERN_INFO "%s: link down\n", dev->name);
- netif_carrier_off(dev);
- }
- continue;
- }
-
- switch (port_status & 0x0300) {
- case 0x0000:
- speed = 10;
- break;
- case 0x0100:
- speed = 100;
- break;
- case 0x0200:
- speed = 1000;
- break;
- default:
- speed = -1;
- break;
- }
- duplex = (port_status & 0x0400) ? 1 : 0;
- fc = (port_status & 0x8000) ? 1 : 0;
-
- if (!netif_carrier_ok(dev)) {
- printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
- "flow control %sabled\n", dev->name,
- speed, duplex ? "full" : "half",
- fc ? "en" : "dis");
- netif_carrier_on(dev);
- }
- }
-}
-
-static int mv88e6xxx_stats_wait(struct dsa_switch *ds)
-{
- int ret;
- int i;
-
- for (i = 0; i < 10; i++) {
- ret = REG_READ(REG_GLOBAL, 0x1d);
- if ((ret & 0x8000) == 0)
- return 0;
- }
-
- return -ETIMEDOUT;
-}
-
-static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port)
-{
- int ret;
-
- /*
- * Snapshot the hardware statistics counters for this port.
- */
- REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port);
-
- /*
- * Wait for the snapshotting to complete.
- */
- ret = mv88e6xxx_stats_wait(ds);
- if (ret < 0)
- return ret;
-
- return 0;
-}
-
-static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val)
-{
- u32 _val;
- int ret;
-
- *val = 0;
-
- ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat);
- if (ret < 0)
- return;
-
- ret = mv88e6xxx_stats_wait(ds);
- if (ret < 0)
- return;
-
- ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e);
- if (ret < 0)
- return;
-
- _val = ret << 16;
-
- ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f);
- if (ret < 0)
- return;
-
- *val = _val | ret;
-}
-
-void mv88e6xxx_get_strings(struct dsa_switch *ds,
- int nr_stats, struct mv88e6xxx_hw_stat *stats,
- int port, uint8_t *data)
-{
- int i;
-
- for (i = 0; i < nr_stats; i++) {
- memcpy(data + i * ETH_GSTRING_LEN,
- stats[i].string, ETH_GSTRING_LEN);
- }
-}
-
-void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
- int nr_stats, struct mv88e6xxx_hw_stat *stats,
- int port, uint64_t *data)
-{
- struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
- int ret;
- int i;
-
- mutex_lock(&ps->stats_mutex);
-
- ret = mv88e6xxx_stats_snapshot(ds, port);
- if (ret < 0) {
- mutex_unlock(&ps->stats_mutex);
- return;
- }
-
- /*
- * Read each of the counters.
- */
- for (i = 0; i < nr_stats; i++) {
- struct mv88e6xxx_hw_stat *s = stats + i;
- u32 low;
- u32 high;
-
- mv88e6xxx_stats_read(ds, s->reg, &low);
- if (s->sizeof_stat == 8)
- mv88e6xxx_stats_read(ds, s->reg + 1, &high);
- else
- high = 0;
-
- data[i] = (((u64)high) << 32) | low;
- }
-
- mutex_unlock(&ps->stats_mutex);
-}
diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h
deleted file mode 100644
index 61156ca26a0d..000000000000
--- a/net/dsa/mv88e6xxx.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support
- * Copyright (c) 2008 Marvell Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __MV88E6XXX_H
-#define __MV88E6XXX_H
-
-#define REG_PORT(p) (0x10 + (p))
-#define REG_GLOBAL 0x1b
-#define REG_GLOBAL2 0x1c
-
-struct mv88e6xxx_priv_state {
- /*
- * When using multi-chip addressing, this mutex protects
- * access to the indirect access registers. (In single-chip
- * mode, this mutex is effectively useless.)
- */
- struct mutex smi_mutex;
-
-#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
- /*
- * Handles automatic disabling and re-enabling of the PHY
- * polling unit.
- */
- struct mutex ppu_mutex;
- int ppu_disabled;
- struct work_struct ppu_work;
- struct timer_list ppu_timer;
-#endif
-
- /*
- * This mutex serialises access to the statistics unit.
- * Hold this mutex over snapshot + dump sequences.
- */
- struct mutex stats_mutex;
-
- int id; /* switch product id */
-};
-
-struct mv88e6xxx_hw_stat {
- char string[ETH_GSTRING_LEN];
- int sizeof_stat;
- int reg;
-};
-
-int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg);
-int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg);
-int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
- int reg, u16 val);
-int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val);
-int mv88e6xxx_config_prio(struct dsa_switch *ds);
-int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr);
-int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr);
-int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum);
-int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val);
-void mv88e6xxx_ppu_state_init(struct dsa_switch *ds);
-int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum);
-int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
- int regnum, u16 val);
-void mv88e6xxx_poll_link(struct dsa_switch *ds);
-void mv88e6xxx_get_strings(struct dsa_switch *ds,
- int nr_stats, struct mv88e6xxx_hw_stat *stats,
- int port, uint8_t *data);
-void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
- int nr_stats, struct mv88e6xxx_hw_stat *stats,
- int port, uint64_t *data);
-
-#define REG_READ(addr, reg) \
- ({ \
- int __ret; \
- \
- __ret = mv88e6xxx_reg_read(ds, addr, reg); \
- if (__ret < 0) \
- return __ret; \
- __ret; \
- })
-
-#define REG_WRITE(addr, reg, val) \
- ({ \
- int __ret; \
- \
- __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \
- if (__ret < 0) \
- return __ret; \
- })
-
-
-
-#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 98dfe80b4538..cacce1e22f9c 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -186,20 +186,7 @@ out:
return 0;
}
-static struct packet_type dsa_packet_type __read_mostly = {
+struct packet_type dsa_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_DSA),
.func = dsa_rcv,
};
-
-static int __init dsa_init_module(void)
-{
- dev_add_pack(&dsa_packet_type);
- return 0;
-}
-module_init(dsa_init_module);
-
-static void __exit dsa_cleanup_module(void)
-{
- dev_remove_pack(&dsa_packet_type);
-}
-module_exit(dsa_cleanup_module);
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 6f383322ad25..e70c43c25e64 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -205,20 +205,7 @@ out:
return 0;
}
-static struct packet_type edsa_packet_type __read_mostly = {
+struct packet_type edsa_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_EDSA),
.func = edsa_rcv,
};
-
-static int __init edsa_init_module(void)
-{
- dev_add_pack(&edsa_packet_type);
- return 0;
-}
-module_init(edsa_init_module);
-
-static void __exit edsa_cleanup_module(void)
-{
- dev_remove_pack(&edsa_packet_type);
-}
-module_exit(edsa_cleanup_module);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index d6d7d0add3cb..94bc260d015d 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -114,20 +114,7 @@ out:
return 0;
}
-static struct packet_type trailer_packet_type __read_mostly = {
+struct packet_type trailer_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_TRAILER),
.func = trailer_rcv,
};
-
-static int __init trailer_init_module(void)
-{
- dev_add_pack(&trailer_packet_type);
- return 0;
-}
-module_init(trailer_init_module);
-
-static void __exit trailer_cleanup_module(void)
-{
- dev_remove_pack(&trailer_packet_type);
-}
-module_exit(trailer_cleanup_module);
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 1c1f26c5d672..7e717cb35ad1 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -322,6 +322,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
/* Real hardware Econet. We're not worthy etc. */
#ifdef CONFIG_ECONET_NATIVE
unsigned short proto = 0;
+ int hlen, tlen;
int res;
if (len + 15 > dev->mtu) {
@@ -331,12 +332,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
dev_hold(dev);
- skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ skb = sock_alloc_send_skb(sk, len + hlen + tlen,
msg->msg_flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto out_unlock;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
eb = (struct ec_cb *)&skb->cb;
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 19d6aefe97d4..e4ecc1eef98c 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -50,8 +50,6 @@
* SUCH DAMAGE.
*/
-#define DEBUG
-
#include <linux/bitops.h>
#include <linux/if_arp.h>
#include <linux/module.h>
@@ -113,6 +111,20 @@ struct lowpan_dev_record {
struct list_head list;
};
+struct lowpan_fragment {
+ struct sk_buff *skb; /* skb to be assembled */
+ spinlock_t lock; /* concurency lock */
+ u16 length; /* length to be assemled */
+ u32 bytes_rcv; /* bytes received */
+ u16 tag; /* current fragment tag */
+ struct timer_list timer; /* assembling timer */
+ struct list_head list; /* fragments list */
+};
+
+static unsigned short fragment_tag;
+static LIST_HEAD(lowpan_fragments);
+spinlock_t flist_lock;
+
static inline struct
lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
{
@@ -234,6 +246,50 @@ lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr,
return 0;
}
+static void
+lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ pr_debug("(%s): UDP header compression\n", __func__);
+
+ if (((uh->source & LOWPAN_NHC_UDP_4BIT_MASK) ==
+ LOWPAN_NHC_UDP_4BIT_PORT) &&
+ ((uh->dest & LOWPAN_NHC_UDP_4BIT_MASK) ==
+ LOWPAN_NHC_UDP_4BIT_PORT)) {
+ pr_debug("(%s): both ports compression to 4 bits\n", __func__);
+ **hc06_ptr = LOWPAN_NHC_UDP_CS_P_11;
+ **(hc06_ptr + 1) = /* subtraction is faster */
+ (u8)((uh->dest - LOWPAN_NHC_UDP_4BIT_PORT) +
+ ((uh->source & LOWPAN_NHC_UDP_4BIT_PORT) << 4));
+ *hc06_ptr += 2;
+ } else if ((uh->dest & LOWPAN_NHC_UDP_8BIT_MASK) ==
+ LOWPAN_NHC_UDP_8BIT_PORT) {
+ pr_debug("(%s): remove 8 bits of dest\n", __func__);
+ **hc06_ptr = LOWPAN_NHC_UDP_CS_P_01;
+ memcpy(*hc06_ptr + 1, &uh->source, 2);
+ **(hc06_ptr + 3) = (u8)(uh->dest - LOWPAN_NHC_UDP_8BIT_PORT);
+ *hc06_ptr += 4;
+ } else if ((uh->source & LOWPAN_NHC_UDP_8BIT_MASK) ==
+ LOWPAN_NHC_UDP_8BIT_PORT) {
+ pr_debug("(%s): remove 8 bits of source\n", __func__);
+ **hc06_ptr = LOWPAN_NHC_UDP_CS_P_10;
+ memcpy(*hc06_ptr + 1, &uh->dest, 2);
+ **(hc06_ptr + 3) = (u8)(uh->source - LOWPAN_NHC_UDP_8BIT_PORT);
+ *hc06_ptr += 4;
+ } else {
+ pr_debug("(%s): can't compress header\n", __func__);
+ **hc06_ptr = LOWPAN_NHC_UDP_CS_P_00;
+ memcpy(*hc06_ptr + 1, &uh->source, 2);
+ memcpy(*hc06_ptr + 3, &uh->dest, 2);
+ *hc06_ptr += 5;
+ }
+
+ /* checksum is always inline */
+ memcpy(*hc06_ptr, &uh->check, 2);
+ *hc06_ptr += 2;
+}
+
static u8 lowpan_fetch_skb_u8(struct sk_buff *skb)
{
u8 ret;
@@ -244,6 +300,73 @@ static u8 lowpan_fetch_skb_u8(struct sk_buff *skb)
return ret;
}
+static u16 lowpan_fetch_skb_u16(struct sk_buff *skb)
+{
+ u16 ret;
+
+ BUG_ON(!pskb_may_pull(skb, 2));
+
+ ret = skb->data[0] | (skb->data[1] << 8);
+ skb_pull(skb, 2);
+ return ret;
+}
+
+static int
+lowpan_uncompress_udp_header(struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+ u8 tmp;
+
+ tmp = lowpan_fetch_skb_u8(skb);
+
+ if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) {
+ pr_debug("(%s): UDP header uncompression\n", __func__);
+ switch (tmp & LOWPAN_NHC_UDP_CS_P_11) {
+ case LOWPAN_NHC_UDP_CS_P_00:
+ memcpy(&uh->source, &skb->data[0], 2);
+ memcpy(&uh->dest, &skb->data[2], 2);
+ skb_pull(skb, 4);
+ break;
+ case LOWPAN_NHC_UDP_CS_P_01:
+ memcpy(&uh->source, &skb->data[0], 2);
+ uh->dest =
+ skb->data[2] + LOWPAN_NHC_UDP_8BIT_PORT;
+ skb_pull(skb, 3);
+ break;
+ case LOWPAN_NHC_UDP_CS_P_10:
+ uh->source = skb->data[0] + LOWPAN_NHC_UDP_8BIT_PORT;
+ memcpy(&uh->dest, &skb->data[1], 2);
+ skb_pull(skb, 3);
+ break;
+ case LOWPAN_NHC_UDP_CS_P_11:
+ uh->source =
+ LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] >> 4);
+ uh->dest =
+ LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] & 0x0f);
+ skb_pull(skb, 1);
+ break;
+ default:
+ pr_debug("(%s) ERROR: unknown UDP format\n", __func__);
+ goto err;
+ break;
+ }
+
+ pr_debug("(%s): uncompressed UDP ports: src = %d, dst = %d\n",
+ __func__, uh->source, uh->dest);
+
+ /* copy checksum */
+ memcpy(&uh->check, &skb->data[0], 2);
+ skb_pull(skb, 2);
+ } else {
+ pr_debug("(%s): ERROR: unsupported NH format\n", __func__);
+ goto err;
+ }
+
+ return 0;
+err:
+ return -EINVAL;
+}
+
static int lowpan_header_create(struct sk_buff *skb,
struct net_device *dev,
unsigned short type, const void *_daddr,
@@ -342,8 +465,6 @@ static int lowpan_header_create(struct sk_buff *skb,
if (hdr->nexthdr == UIP_PROTO_UDP)
iphc0 |= LOWPAN_IPHC_NH_C;
-/* TODO: next header compression */
-
if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
*hc06_ptr = hdr->nexthdr;
hc06_ptr += 1;
@@ -431,8 +552,9 @@ static int lowpan_header_create(struct sk_buff *skb,
}
}
- /* TODO: UDP header compression */
- /* TODO: Next Header compression */
+ /* UDP header compression */
+ if (hdr->nexthdr == UIP_PROTO_UDP)
+ lowpan_compress_udp_header(&hc06_ptr, skb);
head[0] = iphc0;
head[1] = iphc1;
@@ -467,6 +589,7 @@ static int lowpan_header_create(struct sk_buff *skb,
memcpy(&(sa.hwaddr), saddr, 8);
mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+
return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
type, (void *)&da, (void *)&sa, skb->len);
}
@@ -511,6 +634,21 @@ static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
return stat;
}
+static void lowpan_fragment_timer_expired(unsigned long entry_addr)
+{
+ struct lowpan_fragment *entry = (struct lowpan_fragment *)entry_addr;
+
+ pr_debug("%s: timer expired for frame with tag %d\n", __func__,
+ entry->tag);
+
+ spin_lock(&flist_lock);
+ list_del(&entry->list);
+ spin_unlock(&flist_lock);
+
+ dev_kfree_skb(entry->skb);
+ kfree(entry);
+}
+
static int
lowpan_process_data(struct sk_buff *skb)
{
@@ -525,6 +663,107 @@ lowpan_process_data(struct sk_buff *skb)
if (skb->len < 2)
goto drop;
iphc0 = lowpan_fetch_skb_u8(skb);
+
+ /* fragments assembling */
+ switch (iphc0 & LOWPAN_DISPATCH_MASK) {
+ case LOWPAN_DISPATCH_FRAG1:
+ case LOWPAN_DISPATCH_FRAGN:
+ {
+ struct lowpan_fragment *frame;
+ u8 len, offset;
+ u16 tag;
+ bool found = false;
+
+ len = lowpan_fetch_skb_u8(skb); /* frame length */
+ tag = lowpan_fetch_skb_u16(skb);
+
+ /*
+ * check if frame assembling with the same tag is
+ * already in progress
+ */
+ spin_lock(&flist_lock);
+
+ list_for_each_entry(frame, &lowpan_fragments, list)
+ if (frame->tag == tag) {
+ found = true;
+ break;
+ }
+
+ /* alloc new frame structure */
+ if (!found) {
+ frame = kzalloc(sizeof(struct lowpan_fragment),
+ GFP_ATOMIC);
+ if (!frame)
+ goto unlock_and_drop;
+
+ INIT_LIST_HEAD(&frame->list);
+
+ frame->length = (iphc0 & 7) | (len << 3);
+ frame->tag = tag;
+
+ /* allocate buffer for frame assembling */
+ frame->skb = alloc_skb(frame->length +
+ sizeof(struct ipv6hdr), GFP_ATOMIC);
+
+ if (!frame->skb) {
+ kfree(frame);
+ goto unlock_and_drop;
+ }
+
+ frame->skb->priority = skb->priority;
+ frame->skb->dev = skb->dev;
+
+ /* reserve headroom for uncompressed ipv6 header */
+ skb_reserve(frame->skb, sizeof(struct ipv6hdr));
+ skb_put(frame->skb, frame->length);
+
+ init_timer(&frame->timer);
+ /* time out is the same as for ipv6 - 60 sec */
+ frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
+ frame->timer.data = (unsigned long)frame;
+ frame->timer.function = lowpan_fragment_timer_expired;
+
+ add_timer(&frame->timer);
+
+ list_add_tail(&frame->list, &lowpan_fragments);
+ }
+
+ if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1)
+ goto unlock_and_drop;
+
+ offset = lowpan_fetch_skb_u8(skb); /* fetch offset */
+
+ /* if payload fits buffer, copy it */
+ if (likely((offset * 8 + skb->len) <= frame->length))
+ skb_copy_to_linear_data_offset(frame->skb, offset * 8,
+ skb->data, skb->len);
+ else
+ goto unlock_and_drop;
+
+ frame->bytes_rcv += skb->len;
+
+ /* frame assembling complete */
+ if ((frame->bytes_rcv == frame->length) &&
+ frame->timer.expires > jiffies) {
+ /* if timer haven't expired - first of all delete it */
+ del_timer(&frame->timer);
+ list_del(&frame->list);
+ spin_unlock(&flist_lock);
+
+ dev_kfree_skb(skb);
+ skb = frame->skb;
+ kfree(frame);
+ iphc0 = lowpan_fetch_skb_u8(skb);
+ break;
+ }
+ spin_unlock(&flist_lock);
+
+ return kfree_skb(skb), 0;
+ }
+ default:
+ break;
+ }
+
iphc1 = lowpan_fetch_skb_u8(skb);
_saddr = mac_cb(skb)->sa.hwaddr;
@@ -659,7 +898,10 @@ lowpan_process_data(struct sk_buff *skb)
goto drop;
}
- /* TODO: UDP header parse */
+ /* UDP data uncompression */
+ if (iphc0 & LOWPAN_IPHC_NH_C)
+ if (lowpan_uncompress_udp_header(skb))
+ goto drop;
/* Not fragmented package */
hdr.payload_len = htons(skb->len);
@@ -674,6 +916,9 @@ lowpan_process_data(struct sk_buff *skb)
lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr,
sizeof(hdr));
return lowpan_skb_deliver(skb, &hdr);
+
+unlock_and_drop:
+ spin_unlock(&flist_lock);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -692,18 +937,115 @@ static int lowpan_set_address(struct net_device *dev, void *p)
return 0;
}
+static int lowpan_get_mac_header_length(struct sk_buff *skb)
+{
+ /*
+ * Currently long addressing mode is supported only, so the overall
+ * header size is 21:
+ * FC SeqNum DPAN DA SA Sec
+ * 2 + 1 + 2 + 8 + 8 + 0 = 21
+ */
+ return 21;
+}
+
+static int
+lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
+ int mlen, int plen, int offset)
+{
+ struct sk_buff *frag;
+ int hlen, ret;
+
+ /* if payload length is zero, therefore it's a first fragment */
+ hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE);
+
+ lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
+
+ frag = dev_alloc_skb(hlen + mlen + plen + IEEE802154_MFR_SIZE);
+ if (!frag)
+ return -ENOMEM;
+
+ frag->priority = skb->priority;
+ frag->dev = skb->dev;
+
+ /* copy header, MFR and payload */
+ memcpy(skb_put(frag, mlen), skb->data, mlen);
+ memcpy(skb_put(frag, hlen), head, hlen);
+
+ if (plen)
+ skb_copy_from_linear_data_offset(skb, offset + mlen,
+ skb_put(frag, plen), plen);
+
+ lowpan_raw_dump_table(__func__, " raw fragment dump", frag->data,
+ frag->len);
+
+ ret = dev_queue_xmit(frag);
+
+ return ret;
+}
+
+static int
+lowpan_skb_fragmentation(struct sk_buff *skb)
+{
+ int err, header_length, payload_length, tag, offset = 0;
+ u8 head[5];
+
+ header_length = lowpan_get_mac_header_length(skb);
+ payload_length = skb->len - header_length;
+ tag = fragment_tag++;
+
+ /* first fragment header */
+ head[0] = LOWPAN_DISPATCH_FRAG1 | (payload_length & 0x7);
+ head[1] = (payload_length >> 3) & 0xff;
+ head[2] = tag & 0xff;
+ head[3] = tag >> 8;
+
+ err = lowpan_fragment_xmit(skb, head, header_length, 0, 0);
+
+ /* next fragment header */
+ head[0] &= ~LOWPAN_DISPATCH_FRAG1;
+ head[0] |= LOWPAN_DISPATCH_FRAGN;
+
+ while ((payload_length - offset > 0) && (err >= 0)) {
+ int len = LOWPAN_FRAG_SIZE;
+
+ head[4] = offset / 8;
+
+ if (payload_length - offset < len)
+ len = payload_length - offset;
+
+ err = lowpan_fragment_xmit(skb, head, header_length,
+ len, offset);
+ offset += len;
+ }
+
+ return err;
+}
+
static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
{
- int err = 0;
+ int err = -1;
pr_debug("(%s): package xmit\n", __func__);
skb->dev = lowpan_dev_info(dev)->real_dev;
if (skb->dev == NULL) {
pr_debug("(%s) ERROR: no real wpan device found\n", __func__);
- dev_kfree_skb(skb);
- } else
+ goto error;
+ }
+
+ if (skb->len <= IEEE802154_MTU) {
err = dev_queue_xmit(skb);
+ goto out;
+ }
+
+ pr_debug("(%s): frame is too big, fragmentation is needed\n",
+ __func__);
+ err = lowpan_skb_fragmentation(skb);
+error:
+ dev_kfree_skb(skb);
+out:
+ if (err < 0)
+ pr_debug("(%s): ERROR: xmit failed\n", __func__);
return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK);
}
@@ -730,13 +1072,12 @@ static void lowpan_setup(struct net_device *dev)
dev->addr_len = IEEE802154_ADDR_LEN;
memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
dev->type = ARPHRD_IEEE802154;
- dev->features = NETIF_F_NO_CSUM;
/* Frame Control + Sequence Number + Address fields + Security Header */
dev->hard_header_len = 2 + 1 + 20 + 14;
dev->needed_tailroom = 2; /* FCS */
dev->mtu = 1281;
dev->tx_queue_len = 0;
- dev->flags = IFF_NOARP | IFF_BROADCAST;
+ dev->flags = IFF_BROADCAST | IFF_MULTICAST;
dev->watchdog_timeo = 0;
dev->netdev_ops = &lowpan_netdev_ops;
@@ -765,8 +1106,15 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop;
/* check that it's our buffer */
- if ((skb->data[0] & 0xe0) == 0x60)
+ switch (skb->data[0] & 0xe0) {
+ case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
+ case LOWPAN_DISPATCH_FRAG1: /* first fragment header */
+ case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */
lowpan_process_data(skb);
+ break;
+ default:
+ break;
+ }
return NET_RX_SUCCESS;
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 5d8cf80b930d..aeff3f310482 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -159,6 +159,24 @@
#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */
#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */
+#define LOWPAN_DISPATCH_MASK 0xf8 /* 11111000 */
+
+#define LOWPAN_FRAG_TIMEOUT (HZ * 60) /* time-out 60 sec */
+
+#define LOWPAN_FRAG1_HEAD_SIZE 0x4
+#define LOWPAN_FRAGN_HEAD_SIZE 0x5
+
+/*
+ * According IEEE802.15.4 standard:
+ * - MTU is 127 octets
+ * - maximum MHR size is 37 octets
+ * - MFR size is 2 octets
+ *
+ * so minimal payload size that we may guarantee is:
+ * MTU - MHR - MFR = 88 octets
+ */
+#define LOWPAN_FRAG_SIZE 88
+
/*
* Values of fields within the IPHC encoding first byte
* (C stands for compressed and I for inline)
@@ -201,6 +219,11 @@
#define LOWPAN_NHC_UDP_CHECKSUMC 0x04
#define LOWPAN_NHC_UDP_CHECKSUMI 0x00
+#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0
+#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0
+#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000
+#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00
+
/* values for port compression, _with checksum_ ie bit 5 set to 0 */
#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */
#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline,
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index faecf648123f..1b09eaabaac1 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -209,6 +209,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
unsigned mtu;
struct sk_buff *skb;
struct dgram_sock *ro = dgram_sk(sk);
+ int hlen, tlen;
int err;
if (msg->msg_flags & MSG_OOB) {
@@ -229,13 +230,15 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
mtu = dev->mtu;
pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
- skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size,
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ skb = sock_alloc_send_skb(sk, hlen + tlen + size,
msg->msg_flags & MSG_DONTWAIT,
&err);
if (!skb)
goto out_dev;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 10970ca85748..f96bae8fd330 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -108,6 +108,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct net_device *dev;
unsigned mtu;
struct sk_buff *skb;
+ int hlen, tlen;
int err;
if (msg->msg_flags & MSG_OOB) {
@@ -137,12 +138,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto out_dev;
}
- skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size,
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ skb = sock_alloc_send_skb(sk, hlen + tlen + size,
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
goto out_dev;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index cbb505ba9324..1a8f93bd2d4f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -409,6 +409,10 @@ config INET_TCP_DIAG
depends on INET_DIAG
def_tristate INET_DIAG
+config INET_UDP_DIAG
+ depends on INET_DIAG
+ def_tristate INET_DIAG && IPV6
+
menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f2dc69cffb57..ff75d3bbcd6a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IP_PNP) += ipconfig.o
obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
obj-$(CONFIG_INET_DIAG) += inet_diag.o
obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
+obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
@@ -47,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b5096a9875a..f7b5670744f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1250,7 +1250,8 @@ out:
return err;
}
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct iphdr *iph;
@@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net)
sizeof(struct icmp_mib),
__alignof__(struct icmp_mib)) < 0)
goto err_icmp_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
- sizeof(struct icmpmsg_mib),
- __alignof__(struct icmpmsg_mib)) < 0)
+ net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
+ GFP_KERNEL);
+ if (!net->mib.icmpmsg_statistics)
goto err_icmpmsg_mib;
tcp_mib_init(net);
@@ -1598,7 +1599,7 @@ err_tcp_mib:
static __net_exit void ipv4_mib_exit_net(struct net *net)
{
- snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics);
+ kfree(net->mib.icmpmsg_statistics);
snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
snmp_mib_free((void __percpu **)net->mib.udp_statistics);
@@ -1671,6 +1672,8 @@ static int __init inet_init(void)
ip_static_sysctl_init();
#endif
+ tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
+
/*
* Add all the base protocols.
*/
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index c1f4154552fc..36d14406261e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -136,8 +136,6 @@ static void ah_output_done(struct crypto_async_request *base, int err)
memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
}
- err = ah->nexthdr;
-
kfree(AH_SKB_CB(skb)->tmp);
xfrm_output_resume(skb, err);
}
@@ -264,12 +262,12 @@ static void ah_input_done(struct crypto_async_request *base, int err)
if (err)
goto out;
+ err = ah->nexthdr;
+
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, ihl);
__skb_pull(skb, ah_hlen + ihl);
skb_set_transport_header(skb, -ihl);
-
- err = ah->nexthdr;
out:
kfree(AH_SKB_CB(skb)->tmp);
xfrm_input_resume(skb, err);
@@ -371,8 +369,6 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
- err = NET_XMIT_DROP;
goto out_free;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96a164aa1367..59402be133f0 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -112,11 +112,6 @@
#include <net/arp.h>
#include <net/ax25.h>
#include <net/netrom.h>
-#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
-#include <net/atmclip.h>
-struct neigh_table *clip_tbl_hook;
-EXPORT_SYMBOL(clip_tbl_hook);
-#endif
#include <asm/system.h>
#include <linux/uaccess.h>
@@ -126,7 +121,7 @@ EXPORT_SYMBOL(clip_tbl_hook);
/*
* Interface to generic neighbour cache.
*/
-static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd);
+static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd);
static int arp_constructor(struct neighbour *neigh);
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -164,7 +159,6 @@ static const struct neigh_ops arp_broken_ops = {
struct neigh_table arp_tbl = {
.family = AF_INET,
- .entry_size = sizeof(struct neighbour) + 4,
.key_len = 4,
.hash = arp_hash,
.constructor = arp_constructor,
@@ -177,7 +171,7 @@ struct neigh_table arp_tbl = {
.gc_staletime = 60 * HZ,
.reachable_time = 30 * HZ,
.delay_probe_time = 5 * HZ,
- .queue_len = 3,
+ .queue_len_bytes = 64*1024,
.ucast_probes = 3,
.mcast_probes = 3,
.anycast_delay = 1 * HZ,
@@ -221,9 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
static u32 arp_hash(const void *pkey,
const struct net_device *dev,
- __u32 hash_rnd)
+ __u32 *hash_rnd)
{
- return arp_hashfn(*(u32 *)pkey, dev, hash_rnd);
+ return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd);
}
static int arp_constructor(struct neighbour *neigh)
@@ -283,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh)
default:
break;
case ARPHRD_ROSE:
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
#endif
neigh->ops = &arp_broken_ops;
@@ -592,16 +586,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
struct sk_buff *skb;
struct arphdr *arp;
unsigned char *arp_ptr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
/*
* Allocate a buffer
*/
- skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
+ skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC);
if (skb == NULL)
return NULL;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
skb->dev = dev;
@@ -633,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
arp->ar_pro = htons(ETH_P_IP);
break;
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
arp->ar_hrd = htons(ARPHRD_AX25);
arp->ar_pro = htons(AX25_P_IP);
break;
-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
arp->ar_hrd = htons(ARPHRD_NETROM);
arp->ar_pro = htons(AX25_P_IP);
@@ -647,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
#endif
#endif
-#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
+#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
arp->ar_hrd = htons(ARPHRD_ETHER);
arp->ar_pro = htons(ETH_P_IP);
break;
#endif
-#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
+#if IS_ENABLED(CONFIG_TR)
case ARPHRD_IEEE802_TR:
arp->ar_hrd = htons(ARPHRD_IEEE802);
arp->ar_pro = htons(ETH_P_IP);
@@ -1040,7 +1036,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
return -EINVAL;
}
switch (dev->type) {
-#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
+#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
/*
* According to RFC 1390, FDDI devices should accept ARP
@@ -1286,7 +1282,7 @@ void __init arp_init(void)
}
#ifdef CONFIG_PROC_FS
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
/* ------------------------------------------------------------------------ */
/*
@@ -1334,7 +1330,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
read_lock(&n->lock);
/* Convert hardware address to XX:XX:XX:XX ... form. */
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
ax2asc2((ax25_address *)n->ha, hbuffer);
else {
@@ -1347,7 +1343,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
if (k != 0)
--k;
hbuffer[k] = 0;
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
}
#endif
sprintf(tbuf, "%pI4", n->primary_key);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index c6b5092f29a1..65f01dc47565 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1490,7 +1490,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ int old_value = *(int *)ctl->data;
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ int new_value = *(int *)ctl->data;
if (write) {
struct ipv4_devconf *cnf = ctl->extra1;
@@ -1501,6 +1503,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
if (cnf == net->ipv4.devconf_dflt)
devinet_copy_dflt_conf(net, i);
+ if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
+ if ((new_value == 0) && (old_value != 0))
+ rt_cache_flush(net, 0);
}
return ret;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 46339ba7a2d3..799fc790b3cf 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -67,6 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
return err;
}
+EXPORT_SYMBOL_GPL(fib_lookup);
static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 37b671185c81..d04b13ae18fe 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1607,6 +1607,7 @@ found:
rcu_read_unlock();
return ret;
}
+EXPORT_SYMBOL_GPL(fib_table_lookup);
/*
* Remove the leaf and return parent.
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c7472eff2d51..fa057d105bef 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct igmpv3_report *pig;
struct net *net = dev_net(dev);
struct flowi4 fl4;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
while (1) {
- skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
+ skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
if (skb)
break;
@@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
@@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
__be32 group = pmc ? pmc->multiaddr : 0;
struct flowi4 fl4;
__be32 dst;
+ int hlen, tlen;
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
@@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (IS_ERR(rt))
return -1;
- skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC);
if (skb == NULL) {
ip_rt_put(rt);
return -1;
@@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
skb_dst_set(skb, &rt->dst);
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
iph = ip_hdr(skb);
@@ -1574,7 +1579,7 @@ out_unlock:
* Add multicast single-source filter to the interface list
*/
static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
- __be32 *psfsrc, int delta)
+ __be32 *psfsrc)
{
struct ip_sf_list *psf, *psf_prev;
@@ -1709,14 +1714,15 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfcount[sfmode]++;
err = 0;
for (i=0; i<sfcount; i++) {
- err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+ err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]);
if (err)
break;
}
if (err) {
int j;
- pmc->sfcount[sfmode]--;
+ if (!delta)
+ pmc->sfcount[sfmode]--;
for (j=0; j<i; j++)
(void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c14d88ad348d..2e4e24476c4c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -418,7 +418,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
#else
#define AF_INET_FAMILY(fam) 1
@@ -588,10 +588,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
-struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
- const gfp_t priority)
+/**
+ * inet_csk_clone_lock - clone an inet socket, and lock its clone
+ * @sk: the socket to clone
+ * @req: request_sock
+ * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *inet_csk_clone_lock(const struct sock *sk,
+ const struct request_sock *req,
+ const gfp_t priority)
{
- struct sock *newsk = sk_clone(sk, priority);
+ struct sock *newsk = sk_clone_lock(sk, priority);
if (newsk != NULL) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
@@ -615,7 +624,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
}
return newsk;
}
-EXPORT_SYMBOL_GPL(inet_csk_clone);
+EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
/*
* At this point, there should be no process reference to this
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f5e2bdaef949..2240a8e8c44d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -33,6 +33,7 @@
#include <linux/stddef.h>
#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
static const struct inet_diag_handler **inet_diag_table;
@@ -45,24 +46,22 @@ struct inet_diag_entry {
u16 userlocks;
};
-static struct sock *idiagnl;
-
#define INET_DIAG_PUT(skb, attrtype, attrlen) \
RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
static DEFINE_MUTEX(inet_diag_table_mutex);
-static const struct inet_diag_handler *inet_diag_lock_handler(int type)
+static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
- if (!inet_diag_table[type])
- request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_INET_DIAG, type);
+ if (!inet_diag_table[proto])
+ request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+ NETLINK_SOCK_DIAG, AF_INET, proto);
mutex_lock(&inet_diag_table_mutex);
- if (!inet_diag_table[type])
+ if (!inet_diag_table[proto])
return ERR_PTR(-ENOENT);
- return inet_diag_table[type];
+ return inet_diag_table[proto];
}
static inline void inet_diag_unlock_handler(
@@ -71,21 +70,21 @@ static inline void inet_diag_unlock_handler(
mutex_unlock(&inet_diag_table_mutex);
}
-static int inet_csk_diag_fill(struct sock *sk,
- struct sk_buff *skb,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
const struct inet_sock *inet = inet_sk(sk);
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
void *info = NULL;
struct inet_diag_meminfo *minfo = NULL;
unsigned char *b = skb_tail_pointer(skb);
const struct inet_diag_handler *handler;
+ int ext = req->idiag_ext;
- handler = inet_diag_table[unlh->nlmsg_type];
+ handler = inet_diag_table[req->sdiag_protocol];
BUG_ON(handler == NULL);
nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
@@ -97,47 +96,55 @@ static int inet_csk_diag_fill(struct sock *sk,
if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
- if (ext & (1 << (INET_DIAG_INFO - 1)))
- info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
- handler->idiag_info_size);
-
- if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
- const size_t len = strlen(icsk->icsk_ca_ops->name);
-
- strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
- icsk->icsk_ca_ops->name);
- }
-
- if ((ext & (1 << (INET_DIAG_TOS - 1))) && (sk->sk_family != AF_INET6))
- RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
-
r->idiag_family = sk->sk_family;
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
r->idiag_retrans = 0;
r->id.idiag_if = sk->sk_bound_dev_if;
- r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
- r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
r->id.idiag_sport = inet->inet_sport;
r->id.idiag_dport = inet->inet_dport;
r->id.idiag_src[0] = inet->inet_rcv_saddr;
r->id.idiag_dst[0] = inet->inet_daddr;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
+ * hence this needs to be included regardless of socket family.
+ */
+ if (ext & (1 << (INET_DIAG_TOS - 1)))
+ RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
+
+#if IS_ENABLED(CONFIG_IPV6)
if (r->idiag_family == AF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &np->rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &np->daddr);
- if (ext & (1 << (INET_DIAG_TOS - 1)))
- RTA_PUT_U8(skb, INET_DIAG_TOS, np->tclass);
+ *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = np->daddr;
+ if (ext & (1 << (INET_DIAG_TCLASS - 1)))
+ RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
}
#endif
+ r->idiag_uid = sock_i_uid(sk);
+ r->idiag_inode = sock_i_ino(sk);
+
+ if (minfo) {
+ minfo->idiag_rmem = sk_rmem_alloc_get(sk);
+ minfo->idiag_wmem = sk->sk_wmem_queued;
+ minfo->idiag_fmem = sk->sk_forward_alloc;
+ minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+ }
+
+ if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
+ if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
+ goto rtattr_failure;
+
+ if (icsk == NULL) {
+ r->idiag_rqueue = r->idiag_wqueue = 0;
+ goto out;
+ }
+
#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
@@ -158,14 +165,14 @@ static int inet_csk_diag_fill(struct sock *sk,
}
#undef EXPIRES_IN_MS
- r->idiag_uid = sock_i_uid(sk);
- r->idiag_inode = sock_i_ino(sk);
+ if (ext & (1 << (INET_DIAG_INFO - 1)))
+ info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
- if (minfo) {
- minfo->idiag_rmem = sk_rmem_alloc_get(sk);
- minfo->idiag_wmem = sk->sk_wmem_queued;
- minfo->idiag_fmem = sk->sk_forward_alloc;
- minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+ if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+ const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+ strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+ icsk->icsk_ca_ops->name);
}
handler->idiag_get_info(sk, r, info);
@@ -174,6 +181,7 @@ static int inet_csk_diag_fill(struct sock *sk,
icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
icsk->icsk_ca_ops->get_info(sk, ext, skb);
+out:
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
@@ -182,10 +190,20 @@ nlmsg_failure:
nlmsg_trim(skb, b);
return -EMSGSIZE;
}
+EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
+
+static int inet_csk_diag_fill(struct sock *sk,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ return inet_sk_diag_fill(sk, inet_csk(sk),
+ skb, req, pid, seq, nlmsg_flags, unlh);
+}
static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
- struct sk_buff *skb, int ext, u32 pid,
- u32 seq, u16 nlmsg_flags,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
long tmo;
@@ -206,8 +224,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->idiag_family = tw->tw_family;
r->idiag_retrans = 0;
r->id.idiag_if = tw->tw_bound_dev_if;
- r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
- r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+ sock_diag_save_cookie(tw, r->id.idiag_cookie);
r->id.idiag_sport = tw->tw_sport;
r->id.idiag_dport = tw->tw_dport;
r->id.idiag_src[0] = tw->tw_rcv_saddr;
@@ -219,15 +236,13 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->idiag_wqueue = 0;
r->idiag_uid = 0;
r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
const struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &tw6->tw_v6_rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &tw6->tw_v6_daddr);
+ *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
}
#endif
nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
@@ -238,42 +253,31 @@ nlmsg_failure:
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
- skb, ext, pid, seq, nlmsg_flags,
+ skb, r, pid, seq, nlmsg_flags,
unlh);
- return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
+ return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
}
-static int inet_diag_get_exact(struct sk_buff *in_skb,
- const struct nlmsghdr *nlh)
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh, struct inet_diag_req *req)
{
int err;
struct sock *sk;
- struct inet_diag_req *req = NLMSG_DATA(nlh);
struct sk_buff *rep;
- struct inet_hashinfo *hashinfo;
- const struct inet_diag_handler *handler;
-
- handler = inet_diag_lock_handler(nlh->nlmsg_type);
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- goto unlock;
- }
- hashinfo = handler->idiag_hashinfo;
err = -EINVAL;
-
- if (req->idiag_family == AF_INET) {
+ if (req->sdiag_family == AF_INET) {
sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
}
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- else if (req->idiag_family == AF_INET6) {
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (req->sdiag_family == AF_INET6) {
sk = inet6_lookup(&init_net, hashinfo,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
@@ -283,29 +287,26 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
}
#endif
else {
- goto unlock;
+ goto out_nosk;
}
err = -ENOENT;
if (sk == NULL)
- goto unlock;
+ goto out_nosk;
- err = -ESTALE;
- if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
- req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
- ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
- (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+ err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
+ if (err)
goto out;
err = -ENOMEM;
rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
sizeof(struct inet_diag_meminfo) +
- handler->idiag_info_size + 64)),
+ sizeof(struct tcp_info) + 64)),
GFP_KERNEL);
if (!rep)
goto out;
- err = sk_diag_fill(sk, rep, req->idiag_ext,
+ err = sk_diag_fill(sk, rep, req,
NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, nlh);
if (err < 0) {
@@ -313,7 +314,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid,
+ err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
MSG_DONTWAIT);
if (err > 0)
err = 0;
@@ -325,8 +326,25 @@ out:
else
sock_put(sk);
}
-unlock:
+out_nosk:
+ return err;
+}
+EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
+
+static int inet_diag_get_exact(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ const struct inet_diag_handler *handler;
+ int err;
+
+ handler = inet_diag_lock_handler(req->sdiag_protocol);
+ if (IS_ERR(handler))
+ err = PTR_ERR(handler);
+ else
+ err = handler->dump_one(in_skb, nlh, req);
inet_diag_unlock_handler(handler);
+
return err;
}
@@ -357,9 +375,12 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
}
-static int inet_diag_bc_run(const void *bc, int len,
- const struct inet_diag_entry *entry)
+static int inet_diag_bc_run(const struct nlattr *_bc,
+ const struct inet_diag_entry *entry)
{
+ const void *bc = nla_data(_bc);
+ int len = nla_len(_bc);
+
while (len > 0) {
int yes = 1;
const struct inet_diag_bc_op *op = bc;
@@ -433,6 +454,35 @@ static int inet_diag_bc_run(const void *bc, int len,
return len == 0;
}
+int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
+{
+ struct inet_diag_entry entry;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (bc == NULL)
+ return 1;
+
+ entry.family = sk->sk_family;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (entry.family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ entry.saddr = np->rcv_saddr.s6_addr32;
+ entry.daddr = np->daddr.s6_addr32;
+ } else
+#endif
+ {
+ entry.saddr = &inet->inet_rcv_saddr;
+ entry.daddr = &inet->inet_daddr;
+ }
+ entry.sport = inet->inet_num;
+ entry.dport = ntohs(inet->inet_dport);
+ entry.userlocks = sk->sk_userlocks;
+
+ return inet_diag_bc_run(bc, &entry);
+}
+EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
+
static int valid_cc(const void *bc, int len, int cc)
{
while (len >= 0) {
@@ -489,57 +539,29 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
static int inet_csk_diag_dump(struct sock *sk,
struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
-
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
- struct inet_diag_entry entry;
- const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
- sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
- struct inet_sock *inet = inet_sk(sk);
-
- entry.family = sk->sk_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- if (entry.family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- entry.saddr = np->rcv_saddr.s6_addr32;
- entry.daddr = np->daddr.s6_addr32;
- } else
-#endif
- {
- entry.saddr = &inet->inet_rcv_saddr;
- entry.daddr = &inet->inet_daddr;
- }
- entry.sport = inet->inet_num;
- entry.dport = ntohs(inet->inet_dport);
- entry.userlocks = sk->sk_userlocks;
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
- if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
- return 0;
- }
-
- return inet_csk_diag_fill(sk, skb, r->idiag_ext,
+ return inet_csk_diag_fill(sk, skb, r,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
-
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ if (bc != NULL) {
struct inet_diag_entry entry;
- const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
- sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);
@@ -555,11 +577,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
entry.dport = ntohs(tw->tw_dport);
entry.userlocks = 0;
- if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
+ if (!inet_diag_bc_run(bc, &entry))
return 0;
}
- return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
+ return inet_twsk_diag_fill(tw, skb, r,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -585,8 +607,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_retrans = req->retrans;
r->id.idiag_if = sk->sk_bound_dev_if;
- r->id.idiag_cookie[0] = (u32)(unsigned long)req;
- r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+ sock_diag_save_cookie(req, r->id.idiag_cookie);
tmo = req->expires - jiffies;
if (tmo < 0)
@@ -601,12 +622,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_wqueue = 0;
r->idiag_uid = sock_i_uid(sk);
r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (r->idiag_family == AF_INET6) {
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &inet6_rsk(req)->loc_addr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &inet6_rsk(req)->rmt_addr);
+ *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
+ *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
}
#endif
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -619,13 +638,13 @@ nlmsg_failure:
}
static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
struct inet_diag_entry entry;
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -645,9 +664,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
- bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
+ if (bc != NULL) {
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -667,21 +684,20 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (bc) {
entry.saddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
(entry.family == AF_INET6) ?
inet6_rsk(req)->loc_addr.s6_addr32 :
#endif
&ireq->loc_addr;
entry.daddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
(entry.family == AF_INET6) ?
inet6_rsk(req)->rmt_addr.s6_addr32 :
#endif
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
- if (!inet_diag_bc_run(nla_data(bc),
- nla_len(bc), &entry))
+ if (!inet_diag_bc_run(bc, &entry))
continue;
}
@@ -704,19 +720,11 @@ out:
return err;
}
-static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
+ struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
{
int i, num;
int s_i, s_num;
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- const struct inet_diag_handler *handler;
- struct inet_hashinfo *hashinfo;
-
- handler = inet_diag_lock_handler(cb->nlh->nlmsg_type);
- if (IS_ERR(handler))
- goto unlock;
-
- hashinfo = handler->idiag_hashinfo;
s_i = cb->args[1];
s_num = num = cb->args[2];
@@ -741,6 +749,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next_listen;
@@ -750,7 +762,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[3] > 0)
goto syn_recv;
- if (inet_csk_diag_dump(sk, skb, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -759,7 +771,7 @@ syn_recv:
if (!(r->idiag_states & TCPF_SYN_RECV))
goto next_listen;
- if (inet_diag_dump_reqs(skb, sk, cb) < 0) {
+ if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -781,7 +793,7 @@ skip_listen_ht:
}
if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
- goto unlock;
+ goto out;
for (i = s_i; i <= hashinfo->ehash_mask; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i];
@@ -806,13 +818,16 @@ skip_listen_ht:
goto next_normal;
if (!(r->idiag_states & (1 << sk->sk_state)))
goto next_normal;
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_normal;
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next_normal;
if (r->id.idiag_dport != inet->inet_dport &&
r->id.idiag_dport)
goto next_normal;
- if (inet_csk_diag_dump(sk, skb, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
spin_unlock_bh(lock);
goto done;
}
@@ -828,13 +843,16 @@ next_normal:
if (num < s_num)
goto next_dying;
+ if (r->sdiag_family != AF_UNSPEC &&
+ tw->tw_family != r->sdiag_family)
+ goto next_dying;
if (r->id.idiag_sport != tw->tw_sport &&
r->id.idiag_sport)
goto next_dying;
if (r->id.idiag_dport != tw->tw_dport &&
r->id.idiag_dport)
goto next_dying;
- if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
+ if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) {
spin_unlock_bh(lock);
goto done;
}
@@ -848,15 +866,85 @@ next_dying:
done:
cb->args[1] = i;
cb->args[2] = num;
-unlock:
+out:
+ ;
+}
+EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
+
+static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ const struct inet_diag_handler *handler;
+
+ handler = inet_diag_lock_handler(r->sdiag_protocol);
+ if (!IS_ERR(handler))
+ handler->dump(skb, cb, r, bc);
inet_diag_unlock_handler(handler);
+
return skb->len;
}
-static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct nlattr *bc = NULL;
int hdrlen = sizeof(struct inet_diag_req);
+ if (nlmsg_attrlen(cb->nlh, hdrlen))
+ bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
+
+ return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc);
+}
+
+static inline int inet_diag_type2proto(int type)
+{
+ switch (type) {
+ case TCPDIAG_GETSOCK:
+ return IPPROTO_TCP;
+ case DCCPDIAG_GETSOCK:
+ return IPPROTO_DCCP;
+ default:
+ return 0;
+ }
+}
+
+static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
+ struct inet_diag_req req;
+ struct nlattr *bc = NULL;
+ int hdrlen = sizeof(struct inet_diag_req_compat);
+
+ req.sdiag_family = AF_UNSPEC; /* compatibility */
+ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+ if (nlmsg_attrlen(cb->nlh, hdrlen))
+ bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
+
+ return __inet_diag_dump(skb, cb, &req, bc);
+}
+
+static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh)
+{
+ struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
+ struct inet_diag_req req;
+
+ req.sdiag_family = rc->idiag_family;
+ req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+ return inet_diag_get_exact(in_skb, nlh, &req);
+}
+
+static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int hdrlen = sizeof(struct inet_diag_req_compat);
+
if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
nlmsg_len(nlh) < hdrlen)
return -EINVAL;
@@ -873,28 +961,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
}
- return netlink_dump_start(idiagnl, skb, nlh,
- inet_diag_dump, NULL, 0);
+ return netlink_dump_start(sock_diag_nlsk, skb, nlh,
+ inet_diag_dump_compat, NULL, 0);
}
- return inet_diag_get_exact(skb, nlh);
+ return inet_diag_get_exact_compat(skb, nlh);
}
-static DEFINE_MUTEX(inet_diag_mutex);
-
-static void inet_diag_rcv(struct sk_buff *skb)
+static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
{
- mutex_lock(&inet_diag_mutex);
- netlink_rcv_skb(skb, &inet_diag_rcv_msg);
- mutex_unlock(&inet_diag_mutex);
+ int hdrlen = sizeof(struct inet_diag_req);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ if (nlmsg_attrlen(h, hdrlen)) {
+ struct nlattr *attr;
+ attr = nlmsg_find_attr(h, hdrlen,
+ INET_DIAG_REQ_BYTECODE);
+ if (attr == NULL ||
+ nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
+ inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
+ return -EINVAL;
+ }
+
+ return netlink_dump_start(sock_diag_nlsk, skb, h,
+ inet_diag_dump, NULL, 0);
+ }
+
+ return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h));
}
+static struct sock_diag_handler inet_diag_handler = {
+ .family = AF_INET,
+ .dump = inet_diag_handler_dump,
+};
+
+static struct sock_diag_handler inet6_diag_handler = {
+ .family = AF_INET6,
+ .dump = inet_diag_handler_dump,
+};
+
int inet_diag_register(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
int err = -EINVAL;
- if (type >= INET_DIAG_GETSOCK_MAX)
+ if (type >= IPPROTO_MAX)
goto out;
mutex_lock(&inet_diag_table_mutex);
@@ -913,7 +1027,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
- if (type >= INET_DIAG_GETSOCK_MAX)
+ if (type >= IPPROTO_MAX)
return;
mutex_lock(&inet_diag_table_mutex);
@@ -924,7 +1038,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister);
static int __init inet_diag_init(void)
{
- const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX *
+ const int inet_diag_table_size = (IPPROTO_MAX *
sizeof(struct inet_diag_handler *));
int err = -ENOMEM;
@@ -932,25 +1046,35 @@ static int __init inet_diag_init(void)
if (!inet_diag_table)
goto out;
- idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
- inet_diag_rcv, NULL, THIS_MODULE);
- if (idiagnl == NULL)
- goto out_free_table;
- err = 0;
+ err = sock_diag_register(&inet_diag_handler);
+ if (err)
+ goto out_free_nl;
+
+ err = sock_diag_register(&inet6_diag_handler);
+ if (err)
+ goto out_free_inet;
+
+ sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
out:
return err;
-out_free_table:
+
+out_free_inet:
+ sock_diag_unregister(&inet_diag_handler);
+out_free_nl:
kfree(inet_diag_table);
goto out;
}
static void __exit inet_diag_exit(void)
{
- netlink_kernel_release(idiagnl);
+ sock_diag_unregister(&inet6_diag_handler);
+ sock_diag_unregister(&inet_diag_handler);
+ sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
kfree(inet_diag_table);
}
module_init(inet_diag_init);
module_exit(inet_diag_exit);
MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3b34d1c86270..29a07b6c7168 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb)
rt = skb_rtable(skb);
- if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway)
+ if (opt->is_strictroute && opt->nexthop != rt->rt_gateway)
goto sr_failed;
if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d55110e93120..2b53a1f7abf6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -46,7 +46,7 @@
#include <net/rtnetlink.h>
#include <net/gre.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
@@ -171,7 +171,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
{
@@ -729,9 +729,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if ((dst = rt->rt_gateway) == 0)
goto tx_error_icmp;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct neighbour *neigh = dst_get_neighbour(skb_dst(skb));
+ struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
const struct in6_addr *addr6;
int addr_type;
@@ -799,7 +799,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
goto tx_error;
}
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
@@ -835,6 +835,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
if (!new_skb) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
@@ -873,7 +875,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if ((iph->ttl = tiph->ttl) == 0) {
if (skb->protocol == htons(ETH_P_IP))
iph->ttl = old_iph->ttl;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6))
iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
#endif
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ec93335901dd..1e60f7679075 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -568,12 +568,13 @@ void ip_forward_options(struct sk_buff *skb)
) {
if (srrptr + 3 > srrspace)
break;
- if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0)
+ if (memcmp(&opt->nexthop, &optptr[srrptr-1], 4) == 0)
break;
}
if (srrptr + 3 <= srrspace) {
opt->is_changed = 1;
ip_rt_get_source(&optptr[srrptr-1], skb, rt);
+ ip_hdr(skb)->daddr = opt->nexthop;
optptr[2] = srrptr+4;
} else if (net_ratelimit())
printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -640,6 +641,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
}
if (srrptr <= srrspace) {
opt->srr_is_hit = 1;
+ opt->nexthop = nexthop;
opt->is_changed = 1;
}
return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0bc95f3977d2..ff302bde8890 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
}
rcu_read_lock();
- neigh = dst_get_neighbour(dst);
+ neigh = dst_get_neighbour_noref(dst);
if (neigh) {
int res = neigh_output(neigh, skb);
@@ -319,6 +319,20 @@ int ip_output(struct sk_buff *skb)
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
+/*
+ * copy saddr and daddr, possibly using 64bit load/stores
+ * Equivalent to :
+ * iph->saddr = fl4->saddr;
+ * iph->daddr = fl4->daddr;
+ */
+static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
+{
+ BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
+ offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
+ memcpy(&iph->saddr, &fl4->saddr,
+ sizeof(fl4->saddr) + sizeof(fl4->daddr));
+}
+
int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
{
struct sock *sk = skb->sk;
@@ -381,8 +395,8 @@ packet_routed:
iph->frag_off = 0;
iph->ttl = ip_select_ttl(inet, &rt->dst);
iph->protocol = sk->sk_protocol;
- iph->saddr = fl4->saddr;
- iph->daddr = fl4->daddr;
+ ip_copy_addrs(iph, fl4);
+
/* Transport layer set skb->h.foo itself. */
if (inet_opt && inet_opt->opt.optlen) {
@@ -1337,8 +1351,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_select_ident(iph, &rt->dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
- iph->saddr = fl4->saddr;
- iph->daddr = fl4->daddr;
+ ip_copy_addrs(iph, fl4);
if (opt) {
iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 09ff51bf16a4..8aa87c19fa00 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,7 +37,7 @@
#include <net/route.h>
#include <net/xfrm.h>
#include <net/compat.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#include <net/transp_v6.h>
#endif
@@ -55,20 +55,13 @@
/*
* SOL_IP control messages.
*/
+#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
- struct in_pktinfo info;
- struct rtable *rt = skb_rtable(skb);
+ struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
- if (rt) {
- info.ipi_ifindex = rt->rt_iif;
- info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
- } else {
- info.ipi_ifindex = 0;
- info.ipi_spec_dst.s_addr = 0;
- }
put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
@@ -515,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
sock_owned_by_user(sk));
if (inet->is_icsk) {
struct inet_connection_sock *icsk = inet_csk(sk);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET ||
(!((1 << sk->sk_state) &
(TCPF_LISTEN | TCPF_CLOSE)) &&
@@ -526,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (opt)
icsk->icsk_ext_hdr_len += opt->opt.optlen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
}
#endif
}
@@ -992,20 +985,28 @@ e_inval:
}
/**
- * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * ipv4_pktinfo_prepare - transfert some info from rtable to skb
* @sk: socket
* @skb: buffer
*
- * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
- * is not set, we drop skb dst entry now, while dst cache line is hot.
+ * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
+ * in skb->cb[] before dst drop.
+ * This way, receiver doesnt make cache line misses to read rtable.
*/
-int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(struct sk_buff *skb)
{
- if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
- skb_dst_drop(skb);
- return sock_queue_rcv_skb(sk, skb);
+ struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+ const struct rtable *rt = skb_rtable(skb);
+
+ if (rt) {
+ pktinfo->ipi_ifindex = rt->rt_iif;
+ pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
+ } else {
+ pktinfo->ipi_ifindex = 0;
+ pktinfo->ipi_spec_dst.s_addr = 0;
+ }
+ skb_dst_drop(skb);
}
-EXPORT_SYMBOL(ip_queue_rcv_skb);
int ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 0da2afc97f32..7e4ec9fc2cef 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -253,6 +253,10 @@ static int __init ic_open_devs(void)
}
}
+ /* no point in waiting if we could not bring up at least one device */
+ if (!ic_first_dev)
+ goto have_carrier;
+
/* wait for a carrier on at least one device */
start = jiffies;
while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
@@ -763,13 +767,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
struct sk_buff *skb;
struct bootp_pkt *b;
struct iphdr *h;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
/* Allocate packet */
- skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
+ skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15,
GFP_KERNEL);
if (!skb)
return;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
memset(b, 0, sizeof(struct bootp_pkt));
@@ -822,8 +828,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
if (dev_hard_header(skb, dev, ntohs(skb->protocol),
- dev->broadcast, dev->dev_addr, skb->len) < 0 ||
- dev_queue_xmit(skb) < 0)
+ dev->broadcast, dev->dev_addr, skb->len) < 0) {
+ kfree_skb(skb);
+ printk("E");
+ return;
+ }
+
+ if (dev_queue_xmit(skb) < 0)
printk("E");
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 065effd8349a..413ed1ba7a5a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,7 +148,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ipip_get_stats(struct net_device *dev)
{
@@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
if (register_netdevice(dev) < 0)
goto failed_free;
+ strcpy(nt->parms.name, dev->name);
+
dev_hold(dev);
ipip_tunnel_link(ipn, nt);
return nt;
@@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
static int __net_init ipip_init_net(struct net *net)
{
struct ipip_net *ipn = net_generic(net, ipip_net_id);
+ struct ip_tunnel *t;
int err;
ipn->tunnels[0] = ipn->tunnels_wc;
@@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net)
if ((err = register_netdev(ipn->fb_tunnel_dev)))
goto err_reg_dev;
+ t = netdev_priv(ipn->fb_tunnel_dev);
+
+ strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
return 0;
err_reg_dev:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 76a7f07b38b6..8e54490ee3f4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1520,7 +1520,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
struct mr_table *mrt;
struct vif_device *v;
int ct;
- LIST_HEAD(list);
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
@@ -1529,10 +1528,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
- vif_delete(mrt, ct, 1, &list);
+ vif_delete(mrt, ct, 1, NULL);
}
}
- unregister_netdevice_many(&list);
return NOTIFY_DONE;
}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 9899619ab9b8..4f47e064e262 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -64,7 +64,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
/* Change in oif may mean change in hh_len. */
hh_len = skb_dst(skb)->dev->hard_header_len;
if (skb_headroom(skb) < hh_len &&
- pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+ pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
+ 0, GFP_ATOMIC))
return -1;
return 0;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1dfc18a03fd4..74dfc9e5211f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -27,7 +27,7 @@ config NF_CONNTRACK_IPV4
config NF_CONNTRACK_PROC_COMPAT
bool "proc/sysctl compatibility with old connection tracking"
- depends on NF_CONNTRACK_IPV4
+ depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
default y
help
This option enables /proc and sysctl compatibility with the old
@@ -76,11 +76,21 @@ config IP_NF_MATCH_AH
config IP_NF_MATCH_ECN
tristate '"ecn" match support'
depends on NETFILTER_ADVANCED
- help
- This option adds a `ECN' match, which allows you to match against
- the IPv4 and TCP header ECN fields.
+ select NETFILTER_XT_MATCH_ECN
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_ECN.
+
+config IP_NF_MATCH_RPFILTER
+ tristate '"rpfilter" reverse path filter match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option allows you to match packets whose replies would
+ go out via the interface the packet came in.
To compile it as a module, choose M here. If unsure, say N.
+ The module will be called ipt_rpfilter.
config IP_NF_MATCH_TTL
tristate '"ttl" match support'
@@ -325,7 +335,6 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
- depends on NETFILTER_ADVANCED
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index dca2082ec683..213a462b739b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
# matches
obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
-obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
+obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
# targets
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e59aabd0eae4..a057fe64debd 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -404,6 +404,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
int status, type, pid, flags;
unsigned int nlmsglen, skblen;
struct nlmsghdr *nlh;
+ bool enable_timestamp = false;
skblen = skb->len;
if (skblen < sizeof(*nlh))
@@ -441,12 +442,13 @@ __ipq_rcv_skb(struct sk_buff *skb)
RCV_SKB_FAIL(-EBUSY);
}
} else {
- net_enable_timestamp();
+ enable_timestamp = true;
peer_pid = pid;
}
spin_unlock_bh(&queue_lock);
-
+ if (enable_timestamp)
+ net_enable_timestamp();
status = ipq_receive_peer(NLMSG_DATA(nlh), type,
nlmsglen - NLMSG_LENGTH(0));
if (status < 0)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9931152a78b5..2f210c79dc87 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
/* FIXME: Multiple targets. --RR */
static int masquerade_tg_check(const struct xt_tgchk_param *par)
{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
- if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+ if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
pr_debug("bad MAP_IPS.\n");
return -EINVAL;
}
@@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
struct nf_conn_nat *nat;
enum ip_conntrack_info ctinfo;
- struct nf_nat_range newrange;
- const struct nf_nat_multi_range_compat *mr;
+ struct nf_nat_ipv4_range newrange;
+ const struct nf_nat_ipv4_multi_range_compat *mr;
const struct rtable *rt;
__be32 newsrc;
@@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
nat->masq_index = par->out->ifindex;
/* Transfer from original range. */
- newrange = ((struct nf_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ newrange = ((struct nf_nat_ipv4_range)
+ { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
newsrc, newsrc,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
}
static int
@@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV4,
.target = masquerade_tg,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.table = "nat",
.hooks = 1 << NF_INET_POST_ROUTING,
.checkentry = masquerade_tg_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 6cdb298f1035..b5bfbbabf70d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
static int netmap_tg_check(const struct xt_tgchk_param *par)
{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
- if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+ if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
pr_debug("bad MAP_IPS.\n");
return -EINVAL;
}
@@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be32 new_ip, netmask;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
- struct nf_nat_range newrange;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_ipv4_range newrange;
NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
par->hooknum == NF_INET_POST_ROUTING ||
@@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
new_ip = ip_hdr(skb)->saddr & ~netmask;
new_ip |= mr->range[0].min_ip & netmask;
- newrange = ((struct nf_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ newrange = ((struct nf_nat_ipv4_range)
+ { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
new_ip, new_ip,
mr->range[0].min, mr->range[0].max });
@@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = {
.name = "NETMAP",
.family = NFPROTO_IPV4,
.target = netmap_tg,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING) |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 18a0656505a0..7c0103a5203e 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
/* FIXME: Take multiple ranges --RR */
static int redirect_tg_check(const struct xt_tgchk_param *par)
{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
- if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+ if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
pr_debug("bad MAP_IPS.\n");
return -EINVAL;
}
@@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be32 newdst;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
- struct nf_nat_range newrange;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_ipv4_range newrange;
NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
par->hooknum == NF_INET_LOCAL_OUT);
@@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
}
/* Transfer from original range. */
- newrange = ((struct nf_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+ newrange = ((struct nf_nat_ipv4_range)
+ { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
newdst, newdst,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
}
static struct xt_target redirect_tg_reg __read_mostly = {
.name = "REDIRECT",
.family = NFPROTO_IPV4,
.target = redirect_tg,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
.checkentry = redirect_tg_check,
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b5508151e547..ba5756d20165 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -65,7 +65,7 @@ static unsigned int flushtimeout = 10;
module_param(flushtimeout, uint, 0600);
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-static int nflog = 1;
+static bool nflog = true;
module_param(nflog, bool, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
deleted file mode 100644
index 2b57e52c746c..000000000000
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* IP tables module for matching the value of the IPv4 and TCP ECN bits
- *
- * (C) 2002 by Harald Welte <laforge@gnumonks.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_ecn.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
-MODULE_LICENSE("GPL");
-
-static inline bool match_ip(const struct sk_buff *skb,
- const struct ipt_ecn_info *einfo)
-{
- return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
- !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
-}
-
-static inline bool match_tcp(const struct sk_buff *skb,
- const struct ipt_ecn_info *einfo,
- bool *hotdrop)
-{
- struct tcphdr _tcph;
- const struct tcphdr *th;
-
- /* In practice, TCP match does this, so can't fail. But let's
- * be good citizens.
- */
- th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
- if (th == NULL) {
- *hotdrop = false;
- return false;
- }
-
- if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
- if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
- if (th->ece == 1)
- return false;
- } else {
- if (th->ece == 0)
- return false;
- }
- }
-
- if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
- if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
- if (th->cwr == 1)
- return false;
- } else {
- if (th->cwr == 0)
- return false;
- }
- }
-
- return true;
-}
-
-static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
-{
- const struct ipt_ecn_info *info = par->matchinfo;
-
- if (info->operation & IPT_ECN_OP_MATCH_IP)
- if (!match_ip(skb, info))
- return false;
-
- if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
- if (!match_tcp(skb, info, &par->hotdrop))
- return false;
- }
-
- return true;
-}
-
-static int ecn_mt_check(const struct xt_mtchk_param *par)
-{
- const struct ipt_ecn_info *info = par->matchinfo;
- const struct ipt_ip *ip = par->entryinfo;
-
- if (info->operation & IPT_ECN_OP_MATCH_MASK)
- return -EINVAL;
-
- if (info->invert & IPT_ECN_OP_MATCH_MASK)
- return -EINVAL;
-
- if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
- (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
- pr_info("cannot match TCP bits in rule for non-tcp packets\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static struct xt_match ecn_mt_reg __read_mostly = {
- .name = "ecn",
- .family = NFPROTO_IPV4,
- .match = ecn_mt,
- .matchsize = sizeof(struct ipt_ecn_info),
- .checkentry = ecn_mt_check,
- .me = THIS_MODULE,
-};
-
-static int __init ecn_mt_init(void)
-{
- return xt_register_match(&ecn_mt_reg);
-}
-
-static void __exit ecn_mt_exit(void)
-{
- xt_unregister_match(&ecn_mt_reg);
-}
-
-module_init(ecn_mt_init);
-module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
new file mode 100644
index 000000000000..31371be8174b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match");
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 rpfilter_get_saddr(__be32 addr)
+{
+ if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+ ipv4_is_zeronet(addr))
+ return 0;
+ return addr;
+}
+
+static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+ const struct net_device *dev, u8 flags)
+{
+ struct fib_result res;
+ bool dev_match;
+ struct net *net = dev_net(dev);
+ int ret __maybe_unused;
+
+ if (fib_lookup(net, fl4, &res))
+ return false;
+
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
+ return false;
+ }
+ dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ for (ret = 0; ret < res.fi->fib_nhs; ret++) {
+ struct fib_nh *nh = &res.fi->fib_nh[ret];
+
+ if (nh->nh_dev == dev) {
+ dev_match = true;
+ break;
+ }
+ }
+#else
+ if (FIB_RES_DEV(res) == dev)
+ dev_match = true;
+#endif
+ if (dev_match || flags & XT_RPFILTER_LOOSE)
+ return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
+ return dev_match;
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_rpfilter_info *info;
+ const struct iphdr *iph;
+ struct flowi4 flow;
+ bool invert;
+
+ info = par->matchinfo;
+ invert = info->flags & XT_RPFILTER_INVERT;
+
+ if (par->in->flags & IFF_LOOPBACK)
+ return true ^ invert;
+
+ iph = ip_hdr(skb);
+ if (ipv4_is_multicast(iph->daddr)) {
+ if (ipv4_is_zeronet(iph->saddr))
+ return ipv4_is_local_multicast(iph->daddr) ^ invert;
+ flow.flowi4_iif = 0;
+ } else {
+ flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex;
+ }
+
+ flow.daddr = iph->saddr;
+ flow.saddr = rpfilter_get_saddr(iph->daddr);
+ flow.flowi4_oif = 0;
+ flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+ flow.flowi4_tos = RT_TOS(iph->tos);
+ flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+
+ return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+ if (info->flags & options) {
+ pr_info("unknown options encountered");
+ return -EINVAL;
+ }
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "raw") != 0) {
+ pr_info("match only valid in the \'raw\' "
+ "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+ .name = "rpfilter",
+ .family = NFPROTO_IPV4,
+ .checkentry = rpfilter_check,
+ .match = rpfilter_mt,
+ .matchsize = sizeof(struct xt_rpfilter_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING),
+ .me = THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+ return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+ xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c37641e819f2..0e58f09e59fb 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
-static int forward = NF_ACCEPT;
+static bool forward = NF_ACCEPT;
module_param(forward, bool, 0000);
static int __net_init iptable_filter_net_init(struct net *net)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 447bc5cfdc6c..acdd002bb540 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -30,7 +30,6 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
static DEFINE_SPINLOCK(nf_nat_lock);
@@ -57,7 +56,7 @@ hash_by_src(const struct net *net, u16 zone,
/* Original src, to ensure we map it consistently if poss. */
hash = jhash_3words((__force u32)tuple->src.u3.ip,
(__force u32)tuple->src.u.all ^ zone,
- tuple->dst.protonum, 0);
+ tuple->dst.protonum, nf_conntrack_hash_rnd);
return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
}
@@ -82,14 +81,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
* that meet the constraints of range. */
static int
in_range(const struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range)
+ const struct nf_nat_ipv4_range *range)
{
const struct nf_nat_protocol *proto;
int ret = 0;
/* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */
- if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+ if (range->flags & NF_NAT_RANGE_MAP_IPS) {
if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
return 0;
@@ -97,8 +96,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
rcu_read_lock();
proto = __nf_nat_proto_find(tuple->dst.protonum);
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
- proto->in_range(tuple, IP_NAT_MANIP_SRC,
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
+ proto->in_range(tuple, NF_NAT_MANIP_SRC,
&range->min, &range->max))
ret = 1;
rcu_read_unlock();
@@ -123,7 +122,7 @@ static int
find_appropriate_src(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
- const struct nf_nat_range *range)
+ const struct nf_nat_ipv4_range *range)
{
unsigned int h = hash_by_src(net, zone, tuple);
const struct nf_conn_nat *nat;
@@ -157,7 +156,7 @@ find_appropriate_src(struct net *net, u16 zone,
*/
static void
find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
@@ -166,10 +165,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
u_int32_t minip, maxip, j;
/* No IP mapping? Do nothing. */
- if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+ if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
return;
- if (maniptype == IP_NAT_MANIP_SRC)
+ if (maniptype == NF_NAT_MANIP_SRC)
var_ipp = &tuple->src.u3.ip;
else
var_ipp = &tuple->dst.u3.ip;
@@ -189,7 +188,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
minip = ntohl(range->min_ip);
maxip = ntohl(range->max_ip);
j = jhash_2words((__force u32)tuple->src.u3.ip,
- range->flags & IP_NAT_RANGE_PERSISTENT ?
+ range->flags & NF_NAT_RANGE_PERSISTENT ?
0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
j = ((u64)j * (maxip - minip + 1)) >> 32;
*var_ipp = htonl(minip + j);
@@ -204,7 +203,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
@@ -219,8 +218,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
This is only required for source (ie. NAT/masq) mappings.
So far, we don't do local source mappings, so multiple
manips not an issue. */
- if (maniptype == IP_NAT_MANIP_SRC &&
- !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+ if (maniptype == NF_NAT_MANIP_SRC &&
+ !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
/* try the original tuple first */
if (in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -247,8 +246,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
/* Only bother mapping if it's not already in range and unique */
- if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
- if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
+ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+ if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (proto->in_range(tuple, maniptype, &range->min,
&range->max) &&
(range->min.all == range->max.all ||
@@ -267,7 +266,7 @@ out:
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype)
{
struct net *net = nf_ct_net(ct);
@@ -284,8 +283,8 @@ nf_nat_setup_info(struct nf_conn *ct,
}
}
- NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
- maniptype == IP_NAT_MANIP_DST);
+ NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
+ maniptype == NF_NAT_MANIP_DST);
BUG_ON(nf_nat_initialized(ct, maniptype));
/* What we've got will look like inverse of reply. Normally
@@ -306,13 +305,13 @@ nf_nat_setup_info(struct nf_conn *ct,
nf_conntrack_alter_reply(ct, &reply);
/* Non-atomic: we own this at the moment. */
- if (maniptype == IP_NAT_MANIP_SRC)
+ if (maniptype == NF_NAT_MANIP_SRC)
ct->status |= IPS_SRC_NAT;
else
ct->status |= IPS_DST_NAT;
}
- if (maniptype == IP_NAT_MANIP_SRC) {
+ if (maniptype == NF_NAT_MANIP_SRC) {
unsigned int srchash;
srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -327,7 +326,7 @@ nf_nat_setup_info(struct nf_conn *ct,
}
/* It's done. */
- if (maniptype == IP_NAT_MANIP_DST)
+ if (maniptype == NF_NAT_MANIP_DST)
ct->status |= IPS_DST_NAT_DONE;
else
ct->status |= IPS_SRC_NAT_DONE;
@@ -361,7 +360,7 @@ manip_pkt(u_int16_t proto,
iph = (void *)skb->data + iphdroff;
- if (maniptype == IP_NAT_MANIP_SRC) {
+ if (maniptype == NF_NAT_MANIP_SRC) {
csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
iph->saddr = target->src.u3.ip;
} else {
@@ -381,7 +380,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
unsigned long statusbit;
enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
- if (mtype == IP_NAT_MANIP_SRC)
+ if (mtype == NF_NAT_MANIP_SRC)
statusbit = IPS_SRC_NAT;
else
statusbit = IPS_DST_NAT;
@@ -414,8 +413,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
struct icmphdr icmp;
struct iphdr ip;
} *inside;
- const struct nf_conntrack_l4proto *l4proto;
- struct nf_conntrack_tuple inner, target;
+ struct nf_conntrack_tuple target;
int hdrlen = ip_hdrlen(skb);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
@@ -447,7 +445,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
return 0;
}
- if (manip == IP_NAT_MANIP_SRC)
+ if (manip == NF_NAT_MANIP_SRC)
statusbit = IPS_SRC_NAT;
else
statusbit = IPS_DST_NAT;
@@ -463,16 +461,6 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
"dir %s\n", skb, manip,
dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
- /* rcu_read_lock()ed by nf_hook_slow */
- l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
-
- if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
- (hdrlen +
- sizeof(struct icmphdr) + inside->ip.ihl * 4),
- (u_int16_t)AF_INET, inside->ip.protocol,
- &inner, l3proto, l4proto))
- return 0;
-
/* Change inner back to look like incoming packet. We do the
opposite manip on this hook to normal, because it might not
pass all hooks (locally-generated ICMP). Consider incoming
@@ -575,26 +563,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
-static const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
- const struct nf_nat_protocol *p;
-
- rcu_read_lock();
- p = __nf_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &nf_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-
-static void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
- module_put(p->me);
-}
-
static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
[CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
[CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
@@ -602,7 +570,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
static int nfnetlink_parse_nat_proto(struct nlattr *attr,
const struct nf_conn *ct,
- struct nf_nat_range *range)
+ struct nf_nat_ipv4_range *range)
{
struct nlattr *tb[CTA_PROTONAT_MAX+1];
const struct nf_nat_protocol *npt;
@@ -612,21 +580,23 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
if (err < 0)
return err;
- npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+ rcu_read_lock();
+ npt = __nf_nat_proto_find(nf_ct_protonum(ct));
if (npt->nlattr_to_range)
err = npt->nlattr_to_range(tb, range);
- nf_nat_proto_put(npt);
+ rcu_read_unlock();
return err;
}
static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
[CTA_NAT_MINIP] = { .type = NLA_U32 },
[CTA_NAT_MAXIP] = { .type = NLA_U32 },
+ [CTA_NAT_PROTO] = { .type = NLA_NESTED },
};
static int
nfnetlink_parse_nat(const struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_range *range)
+ const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
{
struct nlattr *tb[CTA_NAT_MAX+1];
int err;
@@ -646,7 +616,7 @@ nfnetlink_parse_nat(const struct nlattr *nat,
range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
if (range->min_ip)
- range->flags |= IP_NAT_RANGE_MAP_IPS;
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
if (!tb[CTA_NAT_PROTO])
return 0;
@@ -663,7 +633,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr)
{
- struct nf_nat_range range;
+ struct nf_nat_ipv4_range range;
if (nfnetlink_parse_nat(attr, ct, &range) < 0)
return -EINVAL;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index b9a1136addbd..dc1dd912baf4 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_q931_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
- struct nf_nat_range range;
+ struct nf_nat_ipv4_range range;
if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
nf_nat_follow_master(new, this);
@@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new,
BUG_ON(new->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
range.min = range.max = this->saved_proto;
range.min_ip = range.max_ip =
new->master->tuplehash[!this->dir].tuple.src.u3.ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
}
/****************************************************************************/
@@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_callforwarding_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
- struct nf_nat_range range;
+ struct nf_nat_ipv4_range range;
/* This must be a fresh one. */
BUG_ON(new->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
range.min = range.max = this->saved_proto;
range.min_ip = range.max_ip = this->saved_ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
}
/****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ebc5f8894f99..af65958f6308 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -253,12 +253,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct udphdr *udph;
int datalen, oldlen;
- /* UDP helpers might accidentally mangle the wrong packet */
- iph = ip_hdr(skb);
- if (skb->len < iph->ihl*4 + sizeof(*udph) +
- match_offset + match_len)
- return 0;
-
if (!skb_make_writable(skb, skb->len))
return 0;
@@ -430,22 +424,22 @@ nf_nat_seq_adjust(struct sk_buff *skb,
void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
- struct nf_nat_range range;
+ struct nf_nat_ipv4_range range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_ip = range.max_ip
= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
range.min = range.max = exp->saved_proto;
range.min_ip = range.max_ip
= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}
EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3e8284ba46b8..c273d58980ae 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_tuple t;
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
- struct nf_nat_range range;
+ struct nf_nat_ipv4_range range;
ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
@@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct,
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_ip = range.max_ip
= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
if (exp->dir == IP_CT_DIR_ORIGINAL) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
range.min = range.max = exp->saved_proto;
}
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
- range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_ip = range.max_ip
= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
if (exp->dir == IP_CT_DIR_REPLY) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
range.min = range.max = exp->saved_proto;
}
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}
/* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index a3d997618602..9993bc93e102 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -26,7 +26,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
{
__be16 port;
- if (maniptype == IP_NAT_MANIP_SRC)
+ if (maniptype == NF_NAT_MANIP_SRC)
port = tuple->src.u.all;
else
port = tuple->dst.u.all;
@@ -37,7 +37,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct,
u_int16_t *rover)
@@ -46,15 +46,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
__be16 *portptr;
u_int16_t off;
- if (maniptype == IP_NAT_MANIP_SRC)
+ if (maniptype == NF_NAT_MANIP_SRC)
portptr = &tuple->src.u.all;
else
portptr = &tuple->dst.u.all;
/* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
/* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
+ if (maniptype == NF_NAT_MANIP_DST)
return;
if (ntohs(*portptr) < 1024) {
@@ -75,9 +75,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
range_size = ntohs(range->max.all) - min + 1;
}
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+ if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
- maniptype == IP_NAT_MANIP_SRC
+ maniptype == NF_NAT_MANIP_SRC
? tuple->dst.u.all
: tuple->src.u.all);
else
@@ -87,7 +87,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
*portptr = htons(min + off % range_size);
if (++i != range_size && nf_nat_used_tuple(tuple, ct))
continue;
- if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
*rover = off;
return;
}
@@ -96,31 +96,19 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
- const struct nf_nat_range *range)
-{
- NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
- NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
- return 0;
-
-nla_put_failure:
- return -1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
-
int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
- struct nf_nat_range *range)
+ struct nf_nat_ipv4_range *range)
{
if (tb[CTA_PROTONAT_PORT_MIN]) {
range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
range->max.all = range->min.tcp.port;
- range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
if (tb[CTA_PROTONAT_PORT_MAX]) {
range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
- range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
return 0;
}
-EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr);
+EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 570faf2667b2..3f67138d187c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover;
static void
dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb,
iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct dccp_hdr *)(skb->data + hdroff);
- if (maniptype == IP_NAT_MANIP_SRC) {
+ if (maniptype == NF_NAT_MANIP_SRC) {
oldip = iph->saddr;
newip = tuple->src.u3.ip;
newport = tuple->src.u.dccp.port;
@@ -80,12 +80,10 @@ dccp_manip_pkt(struct sk_buff *skb,
static const struct nf_nat_protocol nf_nat_protocol_dccp = {
.protonum = IPPROTO_DCCP,
- .me = THIS_MODULE,
.manip_pkt = dccp_manip_pkt,
.in_range = nf_nat_proto_in_range,
.unique_tuple = dccp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
.nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index bc8d83a31c73..46ba0b9ab985 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
/* generate unique tuple ... */
static void
gre_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
if (!ct->master)
return;
- if (maniptype == IP_NAT_MANIP_SRC)
+ if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
else
keyptr = &tuple->dst.u.gre.key;
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
pr_debug("%p: NATing GRE PPTP\n", ct);
min = 1;
range_size = 0xffff;
@@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
/* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
- if (maniptype != IP_NAT_MANIP_DST)
+ if (maniptype != NF_NAT_MANIP_DST)
return true;
switch (greh->version) {
case GRE_VERSION_1701:
@@ -119,12 +119,10 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
static const struct nf_nat_protocol gre = {
.protonum = IPPROTO_GRE,
- .me = THIS_MODULE,
.manip_pkt = gre_manip_pkt,
.in_range = nf_nat_proto_in_range,
.unique_tuple = gre_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
.nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 9f4dc1235dc7..b35172851bae 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
static void
icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -40,7 +40,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
/* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
range_size = 0xFFFF;
for (i = 0; ; ++id) {
@@ -74,12 +74,10 @@ icmp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_protocol nf_nat_protocol_icmp = {
.protonum = IPPROTO_ICMP,
- .me = THIS_MODULE,
.manip_pkt = icmp_manip_pkt,
.in_range = icmp_in_range,
.unique_tuple = icmp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
.nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index bd5a80a62a5b..3cce9b6c1c29 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -19,7 +19,7 @@ static u_int16_t nf_sctp_port_rover;
static void
sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -46,7 +46,7 @@ sctp_manip_pkt(struct sk_buff *skb,
iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct sctphdr *)(skb->data + hdroff);
- if (maniptype == IP_NAT_MANIP_SRC) {
+ if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
oldip = iph->saddr;
newip = tuple->src.u3.ip;
@@ -70,12 +70,10 @@ sctp_manip_pkt(struct sk_buff *skb,
static const struct nf_nat_protocol nf_nat_protocol_sctp = {
.protonum = IPPROTO_SCTP,
- .me = THIS_MODULE,
.manip_pkt = sctp_manip_pkt,
.in_range = nf_nat_proto_in_range,
.unique_tuple = sctp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
.nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 0d67bb80130f..9fb4b4e72bbf 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -23,7 +23,7 @@ static u_int16_t tcp_port_rover;
static void
tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -55,7 +55,7 @@ tcp_manip_pkt(struct sk_buff *skb,
iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct tcphdr *)(skb->data + hdroff);
- if (maniptype == IP_NAT_MANIP_SRC) {
+ if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
oldip = iph->saddr;
newip = tuple->src.u3.ip;
@@ -82,12 +82,10 @@ tcp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_protocol nf_nat_protocol_tcp = {
.protonum = IPPROTO_TCP,
- .me = THIS_MODULE,
.manip_pkt = tcp_manip_pkt,
.in_range = nf_nat_proto_in_range,
.unique_tuple = tcp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
.nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 0b1b8601cba7..9883336e628f 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -22,7 +22,7 @@ static u_int16_t udp_port_rover;
static void
udp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -47,7 +47,7 @@ udp_manip_pkt(struct sk_buff *skb,
iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct udphdr *)(skb->data + hdroff);
- if (maniptype == IP_NAT_MANIP_SRC) {
+ if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
oldip = iph->saddr;
newip = tuple->src.u3.ip;
@@ -73,12 +73,10 @@ udp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_protocol nf_nat_protocol_udp = {
.protonum = IPPROTO_UDP,
- .me = THIS_MODULE,
.manip_pkt = udp_manip_pkt,
.in_range = nf_nat_proto_in_range,
.unique_tuple = udp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
.nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index f83ef23e2ab7..d24d10a7beb2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -21,7 +21,7 @@ static u_int16_t udplite_port_rover;
static void
udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -47,7 +47,7 @@ udplite_manip_pkt(struct sk_buff *skb,
iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct udphdr *)(skb->data + hdroff);
- if (maniptype == IP_NAT_MANIP_SRC) {
+ if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
oldip = iph->saddr;
newip = tuple->src.u3.ip;
@@ -72,12 +72,10 @@ udplite_manip_pkt(struct sk_buff *skb,
static const struct nf_nat_protocol nf_nat_protocol_udplite = {
.protonum = IPPROTO_UDPLITE,
- .me = THIS_MODULE,
.manip_pkt = udplite_manip_pkt,
.in_range = nf_nat_proto_in_range,
.unique_tuple = udplite_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
.nlattr_to_range = nf_nat_proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index a50f2bc1c732..e0afe8112b1c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
}
static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
+ const struct nf_nat_ipv4_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -46,7 +46,6 @@ unknown_manip_pkt(struct sk_buff *skb,
}
const struct nf_nat_protocol nf_nat_unknown_protocol = {
- /* .me isn't set: getting a ref to this cannot fail. */
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
.unique_tuple = unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 733c9abc1cbd..d2a9dc314e0e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
par->hooknum == NF_INET_LOCAL_IN);
@@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
ctinfo == IP_CT_RELATED_REPLY));
NF_CT_ASSERT(par->out != NULL);
- return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
+ return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC);
}
static unsigned int
@@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
par->hooknum == NF_INET_LOCAL_OUT);
@@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
/* Connection must be valid and new. */
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
- return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
+ return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST);
}
static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
/* Must be a valid range */
if (mr->rangesize != 1) {
@@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
/* Must be a valid range */
if (mr->rangesize != 1) {
@@ -105,13 +105,13 @@ static unsigned int
alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
{
/* Force range to this IP; let proto decide mapping for
- per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED).
*/
- struct nf_nat_range range;
+ struct nf_nat_ipv4_range range;
range.flags = 0;
pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
- HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
+ HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
@@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
static struct xt_target ipt_snat_reg __read_mostly = {
.name = "SNAT",
.target = ipt_snat_target,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
.checkentry = ipt_snat_checkentry,
@@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
static struct xt_target ipt_dnat_reg __read_mostly = {
.name = "DNAT",
.target = ipt_dnat_target,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
.checkentry = ipt_dnat_checkentry,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 78844d9208f1..d0319f96269f 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
static void ip_nat_sip_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
- struct nf_nat_range range;
+ struct nf_nat_ipv4_range range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
range.min = range.max = exp->saved_proto;
range.min_ip = range.max_ip = exp->saved_ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
/* Change src to where master sends to, but only if the connection
* actually came from the same source. */
if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
- range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_ip = range.max_ip
= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
}
}
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 92900482edea..3828a4229822 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
return ret;
} else
pr_debug("Already setup manip %s for ct %p\n",
- maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
break;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index a06f73fdb3c0..43d4c3b22369 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -339,7 +339,6 @@ void ping_err(struct sk_buff *skb, u32 info)
sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
ntohs(icmph->un.echo.id), skb->dev->ifindex);
if (sk == NULL) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
pr_debug("no socket, dropping\n");
return; /* No socket for error */
}
@@ -679,7 +678,6 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
if (sock_queue_rcv_skb(sk, skb) < 0) {
- ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS);
kfree_skb(skb);
pr_debug("ping_queue_rcv_skb -> failed\n");
return -1;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 466ea8bb7a4d..3569d8ecaeac 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -56,17 +56,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
local_bh_disable();
orphans = percpu_counter_sum_positive(&tcp_orphan_count);
- sockets = percpu_counter_sum_positive(&tcp_sockets_allocated);
+ sockets = proto_sockets_allocated_sum_positive(&tcp_prot);
local_bh_enable();
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
tcp_death_row.tw_count, sockets,
- atomic_long_read(&tcp_memory_allocated));
+ proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
- atomic_long_read(&udp_memory_allocated));
+ proto_memory_allocated(&udp_prot));
seq_printf(seq, "UDPLITE: inuse %d\n",
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
@@ -288,7 +288,7 @@ static void icmpmsg_put(struct seq_file *seq)
count = 0;
for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
- val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i);
+ val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]);
if (val) {
type[count] = i;
vals[count++] = val;
@@ -307,6 +307,7 @@ static void icmp_put(struct seq_file *seq)
{
int i;
struct net *net = seq->private;
+ atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
seq_puts(seq, "\nIcmp: InMsgs InErrors");
for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -319,15 +320,13 @@ static void icmp_put(struct seq_file *seq)
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
- icmpmibmap[i].index));
+ atomic_long_read(ptr + icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
- icmpmibmap[i].index | 0x100));
+ atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
}
/*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 007e2eb769d3..3ccda5ae8a27 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
{
/* Charge it to the socket. */
- if (ip_queue_rcv_skb(sk, skb) < 0) {
+ ipv4_pktinfo_prepare(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -327,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
unsigned int iphlen;
int err;
struct rtable *rt = *rtp;
+ int hlen, tlen;
if (length > rt->dst.dev->mtu) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -336,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
if (flags&MSG_PROBE)
goto out;
+ hlen = LL_RESERVED_SPACE(rt->dst.dev);
+ tlen = rt->dst.dev->needed_tailroom;
skb = sock_alloc_send_skb(sk,
- length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
+ length + hlen + tlen + 15,
flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
- skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
+ skb_reserve(skb, hlen);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 155138d8ec8b..bcacf54e5418 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -91,6 +91,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -108,11 +109,10 @@
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
-#include <net/atmclip.h>
#include <net/secure_seq.h>
#define RT_FL_TOS(oldflp4) \
- ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
+ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
#define IP_MAX_MTU 0xFFF0
@@ -120,6 +120,7 @@
static int ip_rt_max_size;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_redirect_number __read_mostly = 9;
static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -131,6 +132,10 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
static int rt_chain_length_max __read_mostly = 20;
+static int redirect_genid;
+
+static struct delayed_work expires_work;
+static unsigned long expires_ljiffies;
/*
* Interface to generic destination cache.
@@ -138,7 +143,7 @@ static int rt_chain_length_max __read_mostly = 20;
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
-static unsigned int ipv4_default_mtu(const struct dst_entry *dst);
+static unsigned int ipv4_mtu(const struct dst_entry *dst);
static void ipv4_dst_destroy(struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
@@ -193,7 +198,7 @@ static struct dst_ops ipv4_dst_ops = {
.gc = rt_garbage_collect,
.check = ipv4_dst_check,
.default_advmss = ipv4_default_advmss,
- .default_mtu = ipv4_default_mtu,
+ .mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
.destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
@@ -416,9 +421,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
else {
struct rtable *r = v;
struct neighbour *n;
- int len;
+ int len, HHUptod;
+
+ rcu_read_lock();
+ n = dst_get_neighbour_noref(&r->dst);
+ HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
+ rcu_read_unlock();
- n = dst_get_neighbour(&r->dst);
seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
"%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
r->dst.dev ? r->dst.dev->name : "*",
@@ -432,7 +441,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
dst_metric(&r->dst, RTAX_RTTVAR)),
r->rt_key_tos,
-1,
- (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0,
+ HHUptod,
r->rt_spec_dst, &len);
seq_printf(seq, "%*s\n", 127 - len, "");
@@ -825,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
return ONE;
}
+static void rt_check_expire(void)
+{
+ static unsigned int rover;
+ unsigned int i = rover, goal;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
+ unsigned long samples = 0;
+ unsigned long sum = 0, sum2 = 0;
+ unsigned long delta;
+ u64 mult;
+
+ delta = jiffies - expires_ljiffies;
+ expires_ljiffies = jiffies;
+ mult = ((u64)delta) << rt_hash_log;
+ if (ip_rt_gc_timeout > 1)
+ do_div(mult, ip_rt_gc_timeout);
+ goal = (unsigned int)mult;
+ if (goal > rt_hash_mask)
+ goal = rt_hash_mask + 1;
+ for (; goal > 0; goal--) {
+ unsigned long tmo = ip_rt_gc_timeout;
+ unsigned long length;
+
+ i = (i + 1) & rt_hash_mask;
+ rthp = &rt_hash_table[i].chain;
+
+ if (need_resched())
+ cond_resched();
+
+ samples++;
+
+ if (rcu_dereference_raw(*rthp) == NULL)
+ continue;
+ length = 0;
+ spin_lock_bh(rt_hash_lock_addr(i));
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
+ prefetch(rth->dst.rt_next);
+ if (rt_is_expired(rth)) {
+ *rthp = rth->dst.rt_next;
+ rt_free(rth);
+ continue;
+ }
+ if (rth->dst.expires) {
+ /* Entry is expired even if it is in use */
+ if (time_before_eq(jiffies, rth->dst.expires)) {
+nofree:
+ tmo >>= 1;
+ rthp = &rth->dst.rt_next;
+ /*
+ * We only count entries on
+ * a chain with equal hash inputs once
+ * so that entries for different QOS
+ * levels, and other non-hash input
+ * attributes don't unfairly skew
+ * the length computation
+ */
+ length += has_noalias(rt_hash_table[i].chain, rth);
+ continue;
+ }
+ } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
+ goto nofree;
+
+ /* Cleanup aged off entries. */
+ *rthp = rth->dst.rt_next;
+ rt_free(rth);
+ }
+ spin_unlock_bh(rt_hash_lock_addr(i));
+ sum += length;
+ sum2 += length*length;
+ }
+ if (samples) {
+ unsigned long avg = sum / samples;
+ unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
+ rt_chain_length_max = max_t(unsigned long,
+ ip_rt_gc_elasticity,
+ (avg + 4*sd) >> FRACT_BITS);
+ }
+ rover = i;
+}
+
+/*
+ * rt_worker_func() is run in process context.
+ * we call rt_check_expire() to scan part of the hash table
+ */
+static void rt_worker_func(struct work_struct *work)
+{
+ rt_check_expire();
+ schedule_delayed_work(&expires_work, ip_rt_gc_interval);
+}
+
/*
* Perturbation of rt_genid by a small quantity [1..256]
* Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -837,6 +937,7 @@ static void rt_cache_invalidate(struct net *net)
get_random_bytes(&shuffle, sizeof(shuffle));
atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
+ redirect_genid++;
}
/*
@@ -1013,23 +1114,18 @@ static int slow_chain_length(const struct rtable *head)
static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
{
- struct neigh_table *tbl = &arp_tbl;
static const __be32 inaddr_any = 0;
struct net_device *dev = dst->dev;
const __be32 *pkey = daddr;
struct neighbour *n;
-#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
- if (dev->type == ARPHRD_ATM)
- tbl = clip_tbl_hook;
-#endif
if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
pkey = &inaddr_any;
- n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey);
+ n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey);
if (n)
return n;
- return neigh_create(tbl, pkey, dev);
+ return neigh_create(&arp_tbl, pkey, dev);
}
static int rt_bind_neighbour(struct rtable *rt)
@@ -1265,7 +1361,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
{
struct rtable *rt = (struct rtable *) dst;
- if (rt) {
+ if (rt && !(rt->dst.flags & DST_NOPEER)) {
if (rt->peer == NULL)
rt_bind_peer(rt, rt->rt_dst, 1);
@@ -1276,7 +1372,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
iph->id = htons(inet_getid(rt->peer, more));
return;
}
- } else
+ } else if (!rt)
printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
__builtin_return_address(0));
@@ -1304,16 +1400,40 @@ static void rt_del(unsigned hash, struct rtable *rt)
spin_unlock_bh(rt_hash_lock_addr(hash));
}
+static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
+{
+ struct rtable *rt = (struct rtable *) dst;
+ __be32 orig_gw = rt->rt_gateway;
+ struct neighbour *n, *old_n;
+
+ dst_confirm(&rt->dst);
+
+ rt->rt_gateway = peer->redirect_learned.a4;
+
+ n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
+ if (IS_ERR(n)) {
+ rt->rt_gateway = orig_gw;
+ return;
+ }
+ old_n = xchg(&rt->dst._neighbour, n);
+ if (old_n)
+ neigh_release(old_n);
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ } else {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
+ }
+}
+
/* called in rcu_read_lock() section */
void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
__be32 saddr, struct net_device *dev)
{
int s, i;
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct rtable *rt;
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
- struct flowi4 fl4;
struct inet_peer *peer;
struct net *net;
@@ -1336,33 +1456,44 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
goto reject_redirect;
}
- memset(&fl4, 0, sizeof(fl4));
- fl4.daddr = daddr;
for (s = 0; s < 2; s++) {
for (i = 0; i < 2; i++) {
- fl4.flowi4_oif = ikeys[i];
- fl4.saddr = skeys[s];
- rt = __ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- continue;
-
- if (rt->dst.error || rt->dst.dev != dev ||
- rt->rt_gateway != old_gw) {
- ip_rt_put(rt);
- continue;
- }
-
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 1);
+ unsigned int hash;
+ struct rtable __rcu **rthp;
+ struct rtable *rt;
+
+ hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net));
+
+ rthp = &rt_hash_table[hash].chain;
+
+ while ((rt = rcu_dereference(*rthp)) != NULL) {
+ rthp = &rt->dst.rt_next;
+
+ if (rt->rt_key_dst != daddr ||
+ rt->rt_key_src != skeys[s] ||
+ rt->rt_oif != ikeys[i] ||
+ rt_is_input_route(rt) ||
+ rt_is_expired(rt) ||
+ !net_eq(dev_net(rt->dst.dev), net) ||
+ rt->dst.error ||
+ rt->dst.dev != dev ||
+ rt->rt_gateway != old_gw)
+ continue;
- peer = rt->peer;
- if (peer) {
- peer->redirect_learned.a4 = new_gw;
- atomic_inc(&__rt_peer_genid);
+ if (!rt->peer)
+ rt_bind_peer(rt, rt->rt_dst, 1);
+
+ peer = rt->peer;
+ if (peer) {
+ if (peer->redirect_learned.a4 != new_gw ||
+ peer->redirect_genid != redirect_genid) {
+ peer->redirect_learned.a4 = new_gw;
+ peer->redirect_genid = redirect_genid;
+ atomic_inc(&__rt_peer_genid);
+ }
+ check_peer_redir(&rt->dst, peer);
+ }
}
-
- ip_rt_put(rt);
- return;
}
}
return;
@@ -1649,40 +1780,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
}
}
-static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
-{
- struct rtable *rt = (struct rtable *) dst;
- __be32 orig_gw = rt->rt_gateway;
- struct neighbour *n, *old_n;
-
- dst_confirm(&rt->dst);
-
- rt->rt_gateway = peer->redirect_learned.a4;
-
- n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
- if (IS_ERR(n))
- return PTR_ERR(n);
- old_n = xchg(&rt->dst._neighbour, n);
- if (old_n)
- neigh_release(old_n);
- if (!n || !(n->nud_state & NUD_VALID)) {
- if (n)
- neigh_event_send(n, NULL);
- rt->rt_gateway = orig_gw;
- return -EAGAIN;
- } else {
- rt->rt_flags |= RTCF_REDIRECTED;
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
- }
- return 0;
-}
-static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
+static void ipv4_validate_peer(struct rtable *rt)
{
- struct rtable *rt = (struct rtable *) dst;
-
- if (rt_is_expired(rt))
- return NULL;
if (rt->rt_peer_genid != rt_peer_genid()) {
struct inet_peer *peer;
@@ -1691,17 +1791,26 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
peer = rt->peer;
if (peer) {
- check_peer_pmtu(dst, peer);
+ check_peer_pmtu(&rt->dst, peer);
+ if (peer->redirect_genid != redirect_genid)
+ peer->redirect_learned.a4 = 0;
if (peer->redirect_learned.a4 &&
- peer->redirect_learned.a4 != rt->rt_gateway) {
- if (check_peer_redir(dst, peer))
- return NULL;
- }
+ peer->redirect_learned.a4 != rt->rt_gateway)
+ check_peer_redir(&rt->dst, peer);
}
rt->rt_peer_genid = rt_peer_genid();
}
+}
+
+static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
+{
+ struct rtable *rt = (struct rtable *) dst;
+
+ if (rt_is_expired(rt))
+ return NULL;
+ ipv4_validate_peer(rt);
return dst;
}
@@ -1806,12 +1915,17 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
return advmss;
}
-static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_mtu(const struct dst_entry *dst)
{
- unsigned int mtu = dst->dev->mtu;
+ const struct rtable *rt = (const struct rtable *) dst;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ if (mtu && rt_is_output_route(rt))
+ return mtu;
+
+ mtu = dst->dev->mtu;
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
- const struct rtable *rt = (const struct rtable *) dst;
if (rt->rt_gateway != rt->rt_dst && mtu > 576)
mtu = 576;
@@ -1844,6 +1958,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
dst_init_metrics(&rt->dst, peer->metrics, false);
check_peer_pmtu(&rt->dst, peer);
+ if (peer->redirect_genid != redirect_genid)
+ peer->redirect_learned.a4 = 0;
if (peer->redirect_learned.a4 &&
peer->redirect_learned.a4 != rt->rt_gateway) {
rt->rt_gateway = peer->redirect_learned.a4;
@@ -2349,6 +2465,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_mark == skb->mark &&
net_eq(dev_net(rth->dst.dev), net) &&
!rt_is_expired(rth)) {
+ ipv4_validate_peer(rth);
if (noref) {
dst_use_noref(&rth->dst, jiffies);
skb_dst_set_noref(skb, &rth->dst);
@@ -2407,11 +2524,11 @@ EXPORT_SYMBOL(ip_route_input_common);
static struct rtable *__mkroute_output(const struct fib_result *res,
const struct flowi4 *fl4,
__be32 orig_daddr, __be32 orig_saddr,
- int orig_oif, struct net_device *dev_out,
+ int orig_oif, __u8 orig_rtos,
+ struct net_device *dev_out,
unsigned int flags)
{
struct fib_info *fi = res->fi;
- u32 tos = RT_FL_TOS(fl4);
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
@@ -2462,7 +2579,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_genid = rt_genid(dev_net(dev_out));
rth->rt_flags = flags;
rth->rt_type = type;
- rth->rt_key_tos = tos;
+ rth->rt_key_tos = orig_rtos;
rth->rt_dst = fl4->daddr;
rth->rt_src = fl4->saddr;
rth->rt_route_iif = 0;
@@ -2512,7 +2629,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
{
struct net_device *dev_out = NULL;
- u32 tos = RT_FL_TOS(fl4);
+ __u8 tos = RT_FL_TOS(fl4);
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
@@ -2688,7 +2805,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
make_route:
rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
- dev_out, flags);
+ tos, dev_out, flags);
if (!IS_ERR(rth)) {
unsigned int hash;
@@ -2724,6 +2841,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
(IPTOS_RT_MASK | RTO_ONLINK)) &&
net_eq(dev_net(rth->dst.dev), net) &&
!rt_is_expired(rth)) {
+ ipv4_validate_peer(rth);
dst_use(&rth->dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
@@ -2747,9 +2865,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
return NULL;
}
-static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
{
- return 0;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst->dev->mtu;
}
static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2767,7 +2887,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
.protocol = cpu_to_be16(ETH_P_IP),
.destroy = ipv4_dst_destroy,
.check = ipv4_blackhole_dst_check,
- .default_mtu = ipv4_blackhole_default_mtu,
+ .mtu = ipv4_blackhole_mtu,
.default_advmss = ipv4_default_advmss,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
.cow_metrics = ipv4_rt_blackhole_cow_metrics,
@@ -2845,7 +2965,7 @@ static int rt_fill_info(struct net *net,
struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
- long expires = 0;
+ unsigned long expires = 0;
const struct inet_peer *peer = rt->peer;
u32 id = 0, ts = 0, tsage = 0, error;
@@ -2902,8 +3022,12 @@ static int rt_fill_info(struct net *net,
tsage = get_seconds() - peer->tcp_ts_stamp;
}
expires = ACCESS_ONCE(peer->pmtu_expires);
- if (expires)
- expires -= jiffies;
+ if (expires) {
+ if (time_before(jiffies, expires))
+ expires -= jiffies;
+ else
+ expires = 0;
+ }
}
if (rt_is_input_route(rt)) {
@@ -3145,6 +3269,13 @@ static ctl_table ipv4_route_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
+ .procname = "gc_interval",
+ .data = &ip_rt_gc_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
.procname = "redirect_load",
.data = &ip_rt_redirect_load,
.maxlen = sizeof(int),
@@ -3354,6 +3485,11 @@ int __init ip_rt_init(void)
devinet_init();
ip_fib_init();
+ INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
+ expires_ljiffies = jiffies;
+ schedule_delayed_work(&expires_work,
+ net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
+
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 90f6544c13e2..51fdbb490437 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -245,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
if (!sysctl_tcp_timestamps)
return false;
- tcp_opt->sack_ok = (options >> 4) & 0x1;
+ tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
*ecn_ok = (options >> 5) & 1;
if (*ecn_ok && !sysctl_tcp_ecn)
return false;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 69fd7201129a..4aa7e9dc0cbb 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
+#include <linux/swap.h>
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
@@ -23,6 +24,7 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
+#include <net/tcp_memcontrol.h>
static int zero;
static int tcp_retr1_max = 255;
@@ -73,7 +75,7 @@ static int ipv4_local_port_range(ctl_table *table, int write,
}
-void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
+static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
{
gid_t *data = table->data;
unsigned seq;
@@ -86,7 +88,7 @@ void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t
}
/* Update system visible IP port range */
-static void set_ping_group_range(struct ctl_table *table, int range[2])
+static void set_ping_group_range(struct ctl_table *table, gid_t range[2])
{
gid_t *data = table->data;
write_seqlock(&sysctl_local_ports.lock);
@@ -174,6 +176,49 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
return ret;
}
+static int ipv4_tcp_mem(ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+ unsigned long vec[3];
+ struct net *net = current->nsproxy->net_ns;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ struct mem_cgroup *memcg;
+#endif
+
+ ctl_table tmp = {
+ .data = &vec,
+ .maxlen = sizeof(vec),
+ .mode = ctl->mode,
+ };
+
+ if (!write) {
+ ctl->data = &net->ipv4.sysctl_tcp_mem;
+ return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
+ }
+
+ ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+
+ tcp_prot_mem(memcg, vec[0], 0);
+ tcp_prot_mem(memcg, vec[1], 1);
+ tcp_prot_mem(memcg, vec[2], 2);
+ rcu_read_unlock();
+#endif
+
+ net->ipv4.sysctl_tcp_mem[0] = vec[0];
+ net->ipv4.sysctl_tcp_mem[1] = vec[1];
+ net->ipv4.sysctl_tcp_mem[2] = vec[2];
+
+ return 0;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -433,13 +478,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_mem",
- .data = &sysctl_tcp_mem,
- .maxlen = sizeof(sysctl_tcp_mem),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax
- },
- {
.procname = "tcp_wmem",
.data = &sysctl_tcp_wmem,
.maxlen = sizeof(sysctl_tcp_wmem),
@@ -721,6 +759,12 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = ipv4_ping_group_range,
},
+ {
+ .procname = "tcp_mem",
+ .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
+ .mode = 0644,
+ .proc_handler = ipv4_tcp_mem,
+ },
{ }
};
@@ -734,6 +778,7 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
struct ctl_table *table;
+ unsigned long limit;
table = ipv4_net_table;
if (!net_eq(net, &init_net)) {
@@ -769,6 +814,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
net->ipv4.sysctl_rt_cache_rebuild_count = 4;
+ limit = nr_free_buffer_pages() / 8;
+ limit = max(limit, 128UL);
+ net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
+ net->ipv4.sysctl_tcp_mem[1] = limit;
+ net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
+
net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
net_ipv4_ctl_path, table);
if (net->ipv4.ipv4_hdr == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1e1c8b..9bcdec3ad772 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
-EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
@@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(tcp_sendpage);
-#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
-#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-
-static inline int select_size(const struct sock *sk, int sg)
+static inline int select_size(const struct sock *sk, bool sg)
{
const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sg) {
- if (sk_can_gso(sk))
- tmp = 0;
- else {
+ if (sk_can_gso(sk)) {
+ /* Small frames wont use a full page:
+ * Payload will immediately follow tcp header.
+ */
+ tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
+ } else {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
if (tmp >= pgbreak &&
@@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct iovec *iov;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int iovlen, flags;
+ int iovlen, flags, err, copied;
int mss_now, size_goal;
- int sg, err, copied;
+ bool sg;
long timeo;
lock_sock(sk);
@@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto out_err;
- sg = sk->sk_route_caps & NETIF_F_SG;
+ sg = !!(sk->sk_route_caps & NETIF_F_SG);
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
@@ -1005,8 +1003,13 @@ new_segment:
} else {
int merge = 0;
int i = skb_shinfo(skb)->nr_frags;
- struct page *page = TCP_PAGE(sk);
- int off = TCP_OFF(sk);
+ struct page *page = sk->sk_sndmsg_page;
+ int off;
+
+ if (page && page_count(page) == 1)
+ sk->sk_sndmsg_off = 0;
+
+ off = sk->sk_sndmsg_off;
if (skb_can_coalesce(skb, i, page, off) &&
off != PAGE_SIZE) {
@@ -1023,7 +1026,7 @@ new_segment:
} else if (page) {
if (off == PAGE_SIZE) {
put_page(page);
- TCP_PAGE(sk) = page = NULL;
+ sk->sk_sndmsg_page = page = NULL;
off = 0;
}
} else
@@ -1049,9 +1052,9 @@ new_segment:
/* If this page was new, give it to the
* socket so it does not get leaked.
*/
- if (!TCP_PAGE(sk)) {
- TCP_PAGE(sk) = page;
- TCP_OFF(sk) = 0;
+ if (!sk->sk_sndmsg_page) {
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
}
goto do_error;
}
@@ -1061,15 +1064,15 @@ new_segment:
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, page, off, copy);
- if (TCP_PAGE(sk)) {
+ if (sk->sk_sndmsg_page) {
get_page(page);
} else if (off + copy < PAGE_SIZE) {
get_page(page);
- TCP_PAGE(sk) = page;
+ sk->sk_sndmsg_page = page;
}
}
- TCP_OFF(sk) = off + copy;
+ sk->sk_sndmsg_off = off + copy;
}
if (!copied)
@@ -2653,7 +2656,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct tcphdr *th;
@@ -3272,14 +3276,9 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans = cnt / 2;
sysctl_max_syn_backlog = max(128, cnt / 256);
- limit = nr_free_buffer_pages() / 8;
- limit = max(limit, 128UL);
- sysctl_tcp_mem[0] = limit / 4 * 3;
- sysctl_tcp_mem[1] = limit;
- sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
-
/* Set per-socket limits to no more than 1/128 the pressure threshold */
- limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
+ limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
+ << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 850c737e08e2..fc6d475f488f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -292,7 +292,7 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
left * tp->mss_cache < sk->sk_gso_max_size)
return 1;
- return left <= tcp_max_burst(tp);
+ return left <= tcp_max_tso_deferred_mss(tp);
}
EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 939edb3b8e4d..8cd357a8be79 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -34,11 +34,23 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
tcp_get_info(sk, info);
}
+static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
+}
+
+static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
+}
+
static const struct inet_diag_handler tcp_diag_handler = {
- .idiag_hashinfo = &tcp_hashinfo,
+ .dump = tcp_diag_dump,
+ .dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
- .idiag_type = TCPDIAG_GETSOCK,
- .idiag_info_size = sizeof(struct tcp_info),
+ .idiag_type = IPPROTO_TCP,
};
static int __init tcp_diag_init(void)
@@ -54,4 +66,4 @@ static void __exit tcp_diag_exit(void)
module_init(tcp_diag_init);
module_exit(tcp_diag_exit);
MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 52b5c2d0ecd0..2877c3e09587 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -322,7 +322,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) &&
- !tcp_memory_pressure) {
+ !sk_under_memory_pressure(sk)) {
int incr;
/* Check #2. Increase window, if skb with such overhead
@@ -411,8 +411,8 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
- !tcp_memory_pressure &&
- atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ !sk_under_memory_pressure(sk) &&
+ sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -865,13 +865,13 @@ static void tcp_disable_fack(struct tcp_sock *tp)
/* RFC3517 uses different metric in lost marker => reset on change */
if (tcp_is_fack(tp))
tp->lost_skb_hint = NULL;
- tp->rx_opt.sack_ok &= ~2;
+ tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
}
/* Take a notice that peer is sending D-SACKs */
static void tcp_dsack_seen(struct tcp_sock *tp)
{
- tp->rx_opt.sack_ok |= 4;
+ tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
}
/* Initialize metrics on socket. */
@@ -2663,7 +2663,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
@@ -2858,7 +2858,7 @@ static void tcp_try_keep_open(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int state = TCP_CA_Open;
- if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
+ if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
state = TCP_CA_Disorder;
if (inet_csk(sk)->icsk_ca_state != state) {
@@ -2881,7 +2881,8 @@ static void tcp_try_to_open(struct sock *sk, int flag)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- tcp_moderate_cwnd(tp);
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+ tcp_moderate_cwnd(tp);
} else {
tcp_cwnd_down(sk, flag);
}
@@ -3009,11 +3010,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int newly_acked_sacked, int flag)
+ int newly_acked_sacked, bool is_dupack,
+ int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
int fast_rexmit = 0, mib_idx;
@@ -3066,17 +3067,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
}
break;
- case TCP_CA_Disorder:
- tcp_try_undo_dsack(sk);
- if (!tp->undo_marker ||
- /* For SACK case do not Open to allow to undo
- * catching for all duplicate ACKs. */
- tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Open);
- }
- break;
-
case TCP_CA_Recovery:
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
@@ -3117,7 +3107,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_add_reno_sack(sk);
}
- if (icsk->icsk_ca_state == TCP_CA_Disorder)
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk)) {
@@ -3681,10 +3671,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
+ bool is_dupack = false;
u32 prior_in_flight;
u32 prior_fackets;
int prior_packets;
int prior_sacked = tp->sacked_out;
+ int pkts_acked = 0;
int newly_acked_sacked = 0;
int frto_cwnd = 0;
@@ -3757,6 +3749,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
+ pkts_acked = prior_packets - tp->packets_out;
newly_acked_sacked = (prior_packets - prior_sacked) -
(tp->packets_out - tp->sacked_out);
@@ -3771,8 +3764,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
- tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
- newly_acked_sacked, flag);
+ is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3784,6 +3778,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
return 1;
no_queue:
+ /* If data was DSACKed, see if we can undo a cwnd reduction. */
+ if (flag & FLAG_DSACKING_ACK)
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3797,10 +3795,14 @@ invalid_ack:
return -1;
old_ack:
+ /* If data was SACKed, tag it and see if we should send more data.
+ * If data was DSACKed, see if we can undo a cwnd reduction.
+ */
if (TCP_SKB_CB(skb)->sacked) {
- tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- if (icsk->icsk_ca_state == TCP_CA_Open)
- tcp_try_keep_open(sk);
+ flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+ newly_acked_sacked = tp->sacked_out - prior_sacked;
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3876,7 +3878,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
!estab && sysctl_tcp_sack) {
- opt_rx->sack_ok = 1;
+ opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
break;
@@ -4864,7 +4866,7 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
- else if (tcp_memory_pressure)
+ else if (sk_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
@@ -4930,11 +4932,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk)
return 0;
/* If we are under global TCP memory pressure, do not expand. */
- if (tcp_memory_pressure)
+ if (sk_under_memory_pressure(sk))
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
return 0;
/* If we filled the congestion window, do not expand. */
@@ -5809,6 +5811,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
if (th->syn) {
+ if (th->fin)
+ goto discard;
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a7443159c400..1eb4ad57670e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,6 +73,7 @@
#include <net/xfrm.h>
#include <net/netdma.h>
#include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -1510,6 +1511,8 @@ exit:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
+ tcp_clear_xmit_timers(newsk);
+ tcp_cleanup_congestion_control(newsk);
bh_unlock_sock(newsk);
sock_put(newsk);
goto exit;
@@ -1915,7 +1918,8 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
- percpu_counter_inc(&tcp_sockets_allocated);
+ sock_update_memcg(sk);
+ sk_sockets_allocated_inc(sk);
local_bh_enable();
return 0;
@@ -1971,7 +1975,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
tp->cookie_values = NULL;
}
- percpu_counter_dec(&tcp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
+ sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2618,7 +2623,6 @@ struct proto tcp_prot = {
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
- .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
@@ -2632,10 +2636,14 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ .init_cgroup = tcp_init_cgroup,
+ .destroy_cgroup = tcp_destroy_cgroup,
+ .proto_cgroup = tcp_proto_cgroup,
+#endif
};
EXPORT_SYMBOL(tcp_prot);
-
static int __net_init tcp_sk_init(struct net *net)
{
return inet_ctl_sock_create(&net->ipv4.tcp_sock,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
new file mode 100644
index 000000000000..7fed04f875c1
--- /dev/null
+++ b/net/ipv4/tcp_memcontrol.c
@@ -0,0 +1,272 @@
+#include <net/tcp.h>
+#include <net/tcp_memcontrol.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <linux/nsproxy.h>
+#include <linux/memcontrol.h>
+#include <linux/module.h>
+
+static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft);
+static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer);
+static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event);
+
+static struct cftype tcp_files[] = {
+ {
+ .name = "kmem.tcp.limit_in_bytes",
+ .write_string = tcp_cgroup_write,
+ .read_u64 = tcp_cgroup_read,
+ .private = RES_LIMIT,
+ },
+ {
+ .name = "kmem.tcp.usage_in_bytes",
+ .read_u64 = tcp_cgroup_read,
+ .private = RES_USAGE,
+ },
+ {
+ .name = "kmem.tcp.failcnt",
+ .private = RES_FAILCNT,
+ .trigger = tcp_cgroup_reset,
+ .read_u64 = tcp_cgroup_read,
+ },
+ {
+ .name = "kmem.tcp.max_usage_in_bytes",
+ .private = RES_MAX_USAGE,
+ .trigger = tcp_cgroup_reset,
+ .read_u64 = tcp_cgroup_read,
+ },
+};
+
+static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
+{
+ return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
+}
+
+static void memcg_tcp_enter_memory_pressure(struct sock *sk)
+{
+ if (sk->sk_cgrp->memory_pressure)
+ *sk->sk_cgrp->memory_pressure = 1;
+}
+EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
+
+int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+ /*
+ * The root cgroup does not use res_counters, but rather,
+ * rely on the data already collected by the network
+ * subsystem
+ */
+ struct res_counter *res_parent = NULL;
+ struct cg_proto *cg_proto, *parent_cg;
+ struct tcp_memcontrol *tcp;
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+ struct net *net = current->nsproxy->net_ns;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ goto create_files;
+
+ tcp = tcp_from_cgproto(cg_proto);
+
+ tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
+ tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
+ tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
+ tcp->tcp_memory_pressure = 0;
+
+ parent_cg = tcp_prot.proto_cgroup(parent);
+ if (parent_cg)
+ res_parent = parent_cg->memory_allocated;
+
+ res_counter_init(&tcp->tcp_memory_allocated, res_parent);
+ percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
+
+ cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
+ cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
+ cg_proto->sysctl_mem = tcp->tcp_prot_mem;
+ cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
+ cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
+ cg_proto->memcg = memcg;
+
+create_files:
+ return cgroup_add_files(cgrp, ss, tcp_files,
+ ARRAY_SIZE(tcp_files));
+}
+EXPORT_SYMBOL(tcp_init_cgroup);
+
+void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+ struct cg_proto *cg_proto;
+ struct tcp_memcontrol *tcp;
+ u64 val;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ percpu_counter_destroy(&tcp->tcp_sockets_allocated);
+
+ val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
+
+ if (val != RESOURCE_MAX)
+ jump_label_dec(&memcg_socket_limit_enabled);
+}
+EXPORT_SYMBOL(tcp_destroy_cgroup);
+
+static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+ u64 old_lim;
+ int i;
+ int ret;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return -EINVAL;
+
+ if (val > RESOURCE_MAX)
+ val = RESOURCE_MAX;
+
+ tcp = tcp_from_cgproto(cg_proto);
+
+ old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
+ ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < 3; i++)
+ tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
+ net->ipv4.sysctl_tcp_mem[i]);
+
+ if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX)
+ jump_label_dec(&memcg_socket_limit_enabled);
+ else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX)
+ jump_label_inc(&memcg_socket_limit_enabled);
+
+ return 0;
+}
+
+static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ unsigned long long val;
+ int ret = 0;
+
+ switch (cft->private) {
+ case RES_LIMIT:
+ /* see memcontrol.c */
+ ret = res_counter_memparse_write_strategy(buffer, &val);
+ if (ret)
+ break;
+ ret = tcp_update_limit(memcg, val);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return default_val;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
+}
+
+static u64 tcp_read_usage(struct mem_cgroup *memcg)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
+}
+
+static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ u64 val;
+
+ switch (cft->private) {
+ case RES_LIMIT:
+ val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
+ break;
+ case RES_USAGE:
+ val = tcp_read_usage(memcg);
+ break;
+ case RES_FAILCNT:
+ case RES_MAX_USAGE:
+ val = tcp_read_stat(memcg, cft->private, 0);
+ break;
+ default:
+ BUG();
+ }
+ return val;
+}
+
+static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
+{
+ struct mem_cgroup *memcg;
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ memcg = mem_cgroup_from_cont(cont);
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return 0;
+ tcp = tcp_from_cgproto(cg_proto);
+
+ switch (event) {
+ case RES_MAX_USAGE:
+ res_counter_reset_max(&tcp->tcp_memory_allocated);
+ break;
+ case RES_FAILCNT:
+ res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
+ break;
+ }
+
+ return 0;
+}
+
+unsigned long long tcp_max_memory(const struct mem_cgroup *memcg)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg);
+ if (!cg_proto)
+ return 0;
+
+ tcp = tcp_from_cgproto(cg_proto);
+ return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
+}
+
+void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
+{
+ struct tcp_memcontrol *tcp;
+ struct cg_proto *cg_proto;
+
+ cg_proto = tcp_prot.proto_cgroup(memcg);
+ if (!cg_proto)
+ return;
+
+ tcp = tcp_from_cgproto(cg_proto);
+
+ tcp->tcp_prot_mem[idx] = val;
+}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 66363b689ad6..550e755747e0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -336,15 +336,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_timewait_sock *tw6;
tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
tw6 = inet6_twsk((struct sock *)tw);
- ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
- ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+ tw6->tw_v6_daddr = np->daddr;
+ tw6->tw_v6_rcv_saddr = np->rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_ipv6only = np->ipv6only;
}
@@ -425,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
*/
struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
{
- struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+ struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
const struct inet_request_sock *ireq = inet_rsk(req);
@@ -495,7 +495,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
- newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
+ !try_module_get(newicsk->icsk_ca_ops->owner))
+ newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 980b98f6288c..8c8de2780c7a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1093,6 +1093,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
{
int i, k, eat;
+ eat = min_t(int, len, skb_headlen(skb));
+ if (eat) {
+ __skb_pull(skb, eat);
+ len -= eat;
+ if (!len)
+ return;
+ }
eat = len;
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -1124,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM;
- /* If len == headlen, we avoid __skb_pull to preserve alignment. */
- if (unlikely(len < skb_headlen(skb)))
- __skb_pull(skb, len);
- else
- __pskb_trim_head(skb, len - skb_headlen(skb));
+ __pskb_trim_head(skb, len);
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1382,7 +1385,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
/* Return 0, if packet can be sent now without violation Nagle's rules:
* 1. It is full sized.
* 2. Or it contains FIN. (already checked by caller)
- * 3. Or TCP_NODELAY was set.
+ * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
* 4. Or TCP_CORK is not set, and all sent packets are ACKed.
* With Minshall's modification: all sent small packets are ACKed.
*/
@@ -1581,7 +1584,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
* frame, so if we have space for more than 3 frames
* then send now.
*/
- if (limit > tcp_max_burst(tp) * tp->mss_cache)
+ if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
goto send_now;
}
@@ -1919,7 +1922,7 @@ u32 __tcp_select_window(struct sock *sk)
if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
- if (tcp_memory_pressure)
+ if (sk_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh,
4U * tp->advmss);
@@ -2147,7 +2150,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ /* make sure skb->data is aligned on arches that require it */
+ if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
+ struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
+ } else {
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ }
if (err == 0) {
/* Update global TCP statistics. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2e0f0af76c19..a516d1e399df 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -171,13 +171,13 @@ static int tcp_write_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
- bool do_reset, syn_set = 0;
+ bool do_reset, syn_set = false;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
dst_negative_advice(sk);
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
- syn_set = 1;
+ syn_set = true;
} else {
if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
/* Black hole detection */
@@ -261,7 +261,7 @@ static void tcp_delack_timer(unsigned long data)
}
out:
- if (tcp_memory_pressure)
+ if (sk_under_memory_pressure(sk))
sk_mem_reclaim(sk);
out_unlock:
bh_unlock_sock(sk);
@@ -340,7 +340,7 @@ void tcp_retransmit_timer(struct sock *sk)
&inet->inet_daddr, ntohs(inet->inet_dport),
inet->inet_num, tp->snd_una, tp->snd_nxt);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ac3b3ee4b07c..01775983b997 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -105,7 +105,7 @@ drop:
return 0;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int tunnel64_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
@@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
break;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static void tunnel64_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
@@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = {
.netns_ok = 1,
};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static const struct net_protocol tunnel64_protocol = {
.handler = tunnel64_rcv,
.err_handler = tunnel64_err,
@@ -167,7 +167,7 @@ static int __init tunnel4_init(void)
printk(KERN_ERR "tunnel4 init: can't add protocol\n");
return -EAGAIN;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
printk(KERN_ERR "tunnel64 init: can't add protocol\n");
inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
@@ -179,7 +179,7 @@ static int __init tunnel4_init(void)
static void __exit tunnel4_fini(void)
{
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
printk(KERN_ERR "tunnel64 close: can't remove protocol\n");
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ab0966df1e2a..5d075b5f70fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -445,7 +445,7 @@ exact_match:
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport,
int dif, struct udp_table *udptable)
{
@@ -512,6 +512,7 @@ begin:
rcu_read_unlock();
return result;
}
+EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
@@ -1164,7 +1165,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
- unsigned int ulen;
+ unsigned int ulen, copied;
int peeked;
int err;
int is_udplite = IS_UDPLITE(sk);
@@ -1186,9 +1187,10 @@ try_again:
goto out;
ulen = skb->len - sizeof(struct udphdr);
- if (len > ulen)
- len = ulen;
- else if (len < ulen)
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
/*
@@ -1197,14 +1199,14 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy.
*/
- if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
}
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, len);
+ msg->msg_iov, copied);
else {
err = skb_copy_and_csum_datagram_iovec(skb,
sizeof(struct udphdr),
@@ -1233,7 +1235,7 @@ try_again:
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
- err = len;
+ err = copied;
if (flags & MSG_TRUNC)
err = ulen;
@@ -1357,7 +1359,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (inet_sk(sk)->inet_daddr)
sock_rps_save_rxhash(sk, skb);
- rc = ip_queue_rcv_skb(sk, skb);
+ rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -1473,6 +1475,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = 0;
+ ipv4_pktinfo_prepare(skb);
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
rc = __udp_queue_rcv_skb(sk, skb);
@@ -2246,7 +2249,8 @@ int udp4_ufo_send_check(struct sk_buff *skb)
return 0;
}
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
new file mode 100644
index 000000000000..69f8a7ca63dd
--- /dev/null
+++ b/net/ipv4/udp_diag.c
@@ -0,0 +1,201 @@
+/*
+ * udp_diag.c Module for monitoring UDP transport protocols sockets.
+ *
+ * Authors: Pavel Emelyanov, <xemul@parallels.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/module.h>
+#include <linux/inet_diag.h>
+#include <linux/udp.h>
+#include <net/udp.h>
+#include <net/udplite.h>
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb, struct inet_diag_req *req,
+ struct nlattr *bc)
+{
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
+
+ return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+
+static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh, struct inet_diag_req *req)
+{
+ int err = -EINVAL;
+ struct sock *sk;
+ struct sk_buff *rep;
+
+ if (req->sdiag_family == AF_INET)
+ sk = __udp4_lib_lookup(&init_net,
+ req->id.idiag_src[0], req->id.idiag_sport,
+ req->id.idiag_dst[0], req->id.idiag_dport,
+ req->id.idiag_if, tbl);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (req->sdiag_family == AF_INET6)
+ sk = __udp6_lib_lookup(&init_net,
+ (struct in6_addr *)req->id.idiag_src,
+ req->id.idiag_sport,
+ (struct in6_addr *)req->id.idiag_dst,
+ req->id.idiag_dport,
+ req->id.idiag_if, tbl);
+#endif
+ else
+ goto out_nosk;
+
+ err = -ENOENT;
+ if (sk == NULL)
+ goto out_nosk;
+
+ err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
+ if (err)
+ goto out;
+
+ err = -ENOMEM;
+ rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
+ sizeof(struct inet_diag_meminfo) +
+ 64)), GFP_KERNEL);
+ if (!rep)
+ goto out;
+
+ err = inet_sk_diag_fill(sk, NULL, rep, req,
+ NETLINK_CB(in_skb).pid,
+ nlh->nlmsg_seq, 0, nlh);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(rep);
+ goto out;
+ }
+ err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+ MSG_DONTWAIT);
+ if (err > 0)
+ err = 0;
+out:
+ if (sk)
+ sock_put(sk);
+out_nosk:
+ return err;
+}
+
+static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ int num, s_num, slot, s_slot;
+
+ s_slot = cb->args[0];
+ num = s_num = cb->args[1];
+
+ for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) {
+ struct sock *sk;
+ struct hlist_nulls_node *node;
+ struct udp_hslot *hslot = &table->hash[slot];
+
+ if (hlist_nulls_empty(&hslot->head))
+ continue;
+
+ spin_lock_bh(&hslot->lock);
+ sk_nulls_for_each(sk, node, &hslot->head) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (num < s_num)
+ goto next;
+ if (!(r->idiag_states & (1 << sk->sk_state)))
+ goto next;
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next;
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next;
+ if (r->id.idiag_dport != inet->inet_dport &&
+ r->id.idiag_dport)
+ goto next;
+
+ if (sk_diag_dump(sk, skb, cb, r, bc) < 0) {
+ spin_unlock_bh(&hslot->lock);
+ goto done;
+ }
+next:
+ num++;
+ }
+ spin_unlock_bh(&hslot->lock);
+ }
+done:
+ cb->args[0] = slot;
+ cb->args[1] = num;
+}
+
+static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ udp_dump(&udp_table, skb, cb, r, bc);
+}
+
+static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ return udp_dump_one(&udp_table, in_skb, nlh, req);
+}
+
+static const struct inet_diag_handler udp_diag_handler = {
+ .dump = udp_diag_dump,
+ .dump_one = udp_diag_dump_one,
+ .idiag_type = IPPROTO_UDP,
+};
+
+static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
+{
+ udp_dump(&udplite_table, skb, cb, r, bc);
+}
+
+static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
+{
+ return udp_dump_one(&udplite_table, in_skb, nlh, req);
+}
+
+static const struct inet_diag_handler udplite_diag_handler = {
+ .dump = udplite_diag_dump,
+ .dump_one = udplite_diag_dump_one,
+ .idiag_type = IPPROTO_UDPLITE,
+};
+
+static int __init udp_diag_init(void)
+{
+ int err;
+
+ err = inet_diag_register(&udp_diag_handler);
+ if (err)
+ goto out;
+ err = inet_diag_register(&udplite_diag_handler);
+ if (err)
+ goto out_lite;
+out:
+ return err;
+out_lite:
+ inet_diag_unregister(&udp_diag_handler);
+ goto out;
+}
+
+static void __exit udp_diag_exit(void)
+{
+ inet_diag_unregister(&udplite_diag_handler);
+ inet_diag_unregister(&udp_diag_handler);
+}
+
+module_init(udp_diag_init);
+module_exit(udp_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */);
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 82806455e859..9247d9d70e9d 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
.priority = 2,
};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
.handler = xfrm_tunnel_rcv,
.err_handler = xfrm_tunnel_err,
@@ -84,7 +84,7 @@ static int __init ipip_init(void)
xfrm_unregister_type(&ipip_type, AF_INET);
return -EAGAIN;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) {
printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n");
xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET);
@@ -97,7 +97,7 @@ static int __init ipip_init(void)
static void __exit ipip_fini(void)
{
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6))
printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n");
#endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cf88df82e2c2..647e6cba237d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -630,13 +630,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
goto out;
}
- rt = addrconf_dst_alloc(idev, addr, 0);
+ rt = addrconf_dst_alloc(idev, addr, false);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto out;
}
- ipv6_addr_copy(&ifa->addr, addr);
+ ifa->addr = *addr;
spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
@@ -650,16 +650,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa->rt = rt;
- /*
- * part one of RFC 4429, section 3.3
- * We should not configure an address as
- * optimistic if we do not yet know the link
- * layer address of our nexhop router
- */
-
- if (dst_get_neighbour_raw(&rt->dst) == NULL)
- ifa->flags &= ~IFA_F_OPTIMISTIC;
-
ifa->idev = idev;
in6_dev_hold(idev);
/* For caller */
@@ -807,7 +797,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
ip6_del_rt(rt);
rt = NULL;
} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
- rt->rt6i_expires = expires;
+ rt->dst.expires = expires;
rt->rt6i_flags |= RTF_EXPIRES;
}
}
@@ -1228,7 +1218,7 @@ try_nextdev:
if (!hiscore->ifa)
return -EADDRNOTAVAIL;
- ipv6_addr_copy(saddr, &hiscore->ifa->addr);
+ *saddr = hiscore->ifa->addr;
in6_ifa_put(hiscore->ifa);
return 0;
}
@@ -1249,7 +1239,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
list_for_each_entry(ifp, &idev->addr_list, if_list) {
if (ifp->scope == IFA_LINK &&
!(ifp->flags & banned_flags)) {
- ipv6_addr_copy(addr, &ifp->addr);
+ *addr = ifp->addr;
err = 0;
break;
}
@@ -1700,7 +1690,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
.fc_protocol = RTPROT_KERNEL,
};
- ipv6_addr_copy(&cfg.fc_dst, pfx);
+ cfg.fc_dst = *pfx;
/* Prevent useless cloning on PtP SIT.
This thing is done here expecting that the whole
@@ -1733,7 +1723,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (!fn)
goto out;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- if (rt->rt6i_dev->ifindex != dev->ifindex)
+ if (rt->dst.dev->ifindex != dev->ifindex)
continue;
if ((rt->rt6i_flags & flags) != flags)
continue;
@@ -1805,7 +1795,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return ERR_PTR(-EACCES);
/* Add default multicast route */
- addrconf_add_mroute(dev);
+ if (!(dev->flags & IFF_LOOPBACK))
+ addrconf_add_mroute(dev);
/* Add link local route */
addrconf_add_lroute(dev);
@@ -1890,11 +1881,11 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
rt = NULL;
} else if (addrconf_finite_timeout(rt_expires)) {
/* not infinity */
- rt->rt6i_expires = jiffies + rt_expires;
+ rt->dst.expires = jiffies + rt_expires;
rt->rt6i_flags |= RTF_EXPIRES;
} else {
rt->rt6i_flags &= ~RTF_EXPIRES;
- rt->rt6i_expires = 0;
+ rt->dst.expires = 0;
}
} else if (valid_lft) {
clock_t expires = 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d27c797f9f05..273f48d1df2e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -347,7 +347,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (!inet->transparent &&
+ if (!(inet->freebind || inet->transparent) &&
!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
err = -EADDRNOTAVAIL;
@@ -361,10 +361,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_rcv_saddr = v4addr;
inet->inet_saddr = v4addr;
- ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+ np->rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
- ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+ np->saddr = addr->sin6_addr;
/* Make sure we are allowed to bind here. */
if (sk->sk_prot->get_port(sk, snum)) {
@@ -458,14 +458,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
peer == 1)
return -ENOTCONN;
sin->sin6_port = inet->inet_dport;
- ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
+ sin->sin6_addr = np->daddr;
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
} else {
if (ipv6_addr_any(&np->rcv_saddr))
- ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+ sin->sin6_addr = np->saddr;
else
- ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
+ sin->sin6_addr = np->rcv_saddr;
sin->sin6_port = inet->inet_sport;
}
@@ -660,8 +660,8 @@ int inet6_sk_rebuild_header(struct sock *sk)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = sk->sk_protocol;
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
- ipv6_addr_copy(&fl6.saddr, &np->saddr);
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
fl6.flowlabel = np->flow_label;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
@@ -769,7 +769,8 @@ out:
return err;
}
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct ipv6hdr *ipv6h;
@@ -985,9 +986,9 @@ static int __net_init ipv6_init_mibs(struct net *net)
sizeof(struct icmpv6_mib),
__alignof__(struct icmpv6_mib)) < 0)
goto err_icmp_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
- sizeof(struct icmpv6msg_mib),
- __alignof__(struct icmpv6msg_mib)) < 0)
+ net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
+ GFP_KERNEL);
+ if (!net->mib.icmpv6msg_statistics)
goto err_icmpmsg_mib;
return 0;
@@ -1008,7 +1009,7 @@ static void ipv6_cleanup_mibs(struct net *net)
snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
- snmp_mib_free((void __percpu **)net->mib.icmpv6msg_statistics);
+ kfree(net->mib.icmpv6msg_statistics);
}
static int __net_init inet6_net_init(struct net *net)
@@ -1115,6 +1116,8 @@ static int __init inet6_init(void)
if (err)
goto static_sysctl_fail;
#endif
+ tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
+
/*
* ipngwg API draft makes clear that the correct semantics
* for TCP and UDP is to consider one TCP and UDP instance
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 2195ae651923..2ae79dbeec2f 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -193,9 +193,9 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
goto bad;
}
- ipv6_addr_copy(&final_addr, &hao->addr);
- ipv6_addr_copy(&hao->addr, &iph->saddr);
- ipv6_addr_copy(&iph->saddr, &final_addr);
+ final_addr = hao->addr;
+ hao->addr = iph->saddr;
+ iph->saddr = final_addr;
}
break;
}
@@ -241,13 +241,13 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
segments = rthdr->hdrlen >> 1;
addrs = ((struct rt0_hdr *)rthdr)->addr;
- ipv6_addr_copy(&final_addr, addrs + segments - 1);
+ final_addr = addrs[segments - 1];
addrs += segments - segments_left;
memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
- ipv6_addr_copy(addrs, &iph->daddr);
- ipv6_addr_copy(&iph->daddr, &final_addr);
+ addrs[0] = iph->daddr;
+ iph->daddr = final_addr;
}
static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
@@ -324,8 +324,6 @@ static void ah6_output_done(struct crypto_async_request *base, int err)
#endif
}
- err = ah->nexthdr;
-
kfree(AH_SKB_CB(skb)->tmp);
xfrm_output_resume(skb, err);
}
@@ -466,12 +464,12 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
if (err)
goto out;
+ err = ah->nexthdr;
+
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, hdr_len);
__skb_pull(skb, ah_hlen + hdr_len);
skb_set_transport_header(skb, -hdr_len);
-
- err = ah->nexthdr;
out:
kfree(AH_SKB_CB(skb)->tmp);
xfrm_input_resume(skb, err);
@@ -583,8 +581,6 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
- err = NET_XMIT_DROP;
goto out_free;
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 674255f5e6b7..59402b4637f9 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -75,7 +75,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (pac == NULL)
return -ENOMEM;
pac->acl_next = NULL;
- ipv6_addr_copy(&pac->acl_addr, addr);
+ pac->acl_addr = *addr;
rcu_read_lock();
if (ifindex == 0) {
@@ -83,7 +83,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
- dev = rt->rt6i_dev;
+ dev = rt->dst.dev;
dst_release(&rt->dst);
} else if (ishost) {
err = -EADDRNOTAVAIL;
@@ -289,14 +289,14 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
goto out;
}
- rt = addrconf_dst_alloc(idev, addr, 1);
+ rt = addrconf_dst_alloc(idev, addr, true);
if (IS_ERR(rt)) {
kfree(aca);
err = PTR_ERR(rt);
goto out;
}
- ipv6_addr_copy(&aca->aca_addr, addr);
+ aca->aca_addr = *addr;
aca->aca_idev = idev;
aca->aca_rt = rt;
aca->aca_users = 1;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e2480691c220..ae08aee1773c 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -71,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+ usin->sin6_addr = flowlabel->dst;
}
}
@@ -143,7 +143,7 @@ ipv4_connected:
}
}
- ipv6_addr_copy(&np->daddr, daddr);
+ np->daddr = *daddr;
np->flow_label = fl6.flowlabel;
inet->inet_dport = usin->sin6_port;
@@ -154,8 +154,8 @@ ipv4_connected:
*/
fl6.flowi6_proto = sk->sk_protocol;
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
- ipv6_addr_copy(&fl6.saddr, &np->saddr);
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = inet->inet_dport;
@@ -179,10 +179,10 @@ ipv4_connected:
/* source address lookup done in ip6_dst_lookup */
if (ipv6_addr_any(&np->saddr))
- ipv6_addr_copy(&np->saddr, &fl6.saddr);
+ np->saddr = fl6.saddr;
if (ipv6_addr_any(&np->rcv_saddr)) {
- ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr);
+ np->rcv_saddr = fl6.saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
@@ -257,7 +257,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
skb_put(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
iph = ipv6_hdr(skb);
- ipv6_addr_copy(&iph->daddr, &fl6->daddr);
+ iph->daddr = fl6->daddr;
serr = SKB_EXT_ERR(skb);
serr->ee.ee_errno = err;
@@ -294,7 +294,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
skb_put(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
iph = ipv6_hdr(skb);
- ipv6_addr_copy(&iph->daddr, &fl6->daddr);
+ iph->daddr = fl6->daddr;
mtu_info = IP6CBMTU(skb);
@@ -303,7 +303,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
mtu_info->ip6m_addr.sin6_port = 0;
mtu_info->ip6m_addr.sin6_flowinfo = 0;
mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
- ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+ mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr;
__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
skb_reset_transport_header(skb);
@@ -354,8 +354,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_port = serr->port;
sin->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IPV6)) {
- ipv6_addr_copy(&sin->sin6_addr,
- (struct in6_addr *)(nh + serr->addr_offset));
+ sin->sin6_addr =
+ *(struct in6_addr *)(nh + serr->addr_offset);
if (np->sndflow)
sin->sin6_flowinfo =
(*(__be32 *)(nh + serr->addr_offset - 24) &
@@ -376,7 +376,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_flowinfo = 0;
sin->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IPV6)) {
- ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
+ sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
datagram_recv_ctl(sk, msg, skb);
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -451,7 +451,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_flowinfo = 0;
sin->sin6_port = 0;
sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
- ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+ sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
}
put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
@@ -475,7 +475,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+ src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
@@ -550,7 +550,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+ src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
@@ -584,7 +584,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
*/
sin6.sin6_family = AF_INET6;
- ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
+ sin6.sin6_addr = ipv6_hdr(skb)->daddr;
sin6.sin6_port = ports[1];
sin6.sin6_flowinfo = 0;
sin6.sin6_scope_id = 0;
@@ -654,12 +654,12 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
- if (!inet_sk(sk)->transparent &&
+ if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
!ipv6_chk_addr(net, &src_info->ipi6_addr,
strict ? dev : NULL, 0))
err = -EINVAL;
else
- ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr);
+ fl6->saddr = src_info->ipi6_addr;
}
rcu_read_unlock();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bf22a225f422..3d641b6e9b09 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -243,9 +243,9 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
- ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
- ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
- ipv6_addr_copy(&hao->addr, &tmp_addr);
+ tmp_addr = ipv6h->saddr;
+ ipv6h->saddr = hao->addr;
+ hao->addr = tmp_addr;
if (skb->tstamp.tv64 == 0)
__net_timestamp(skb);
@@ -461,9 +461,9 @@ looped_back:
return -1;
}
- ipv6_addr_copy(&daddr, addr);
- ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
- ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
+ daddr = *addr;
+ *addr = ipv6_hdr(skb)->daddr;
+ ipv6_hdr(skb)->daddr = daddr;
skb_dst_drop(skb);
ip6_route_input(skb);
@@ -690,7 +690,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
memcpy(phdr->addr, ihdr->addr + 1,
(hops - 1) * sizeof(struct in6_addr));
- ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+ phdr->addr[hops - 1] = **addr_p;
*addr_p = ihdr->addr;
phdr->rt_hdr.nexthdr = *proto;
@@ -888,8 +888,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
if (!opt || !opt->srcrt)
return NULL;
- ipv6_addr_copy(orig, &fl6->daddr);
- ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr);
+ *orig = fl6->daddr;
+ fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
return orig;
}
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 37f548b7f6dc..72957f4a7c6c 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -57,6 +57,9 @@ int ipv6_ext_hdr(u8 nexthdr)
* it returns NULL.
* - First fragment header is skipped, not-first ones
* are considered as unparsable.
+ * - Reports the offset field of the final fragment header so it is
+ * possible to tell whether this is a first fragment, later fragment,
+ * or not fragmented.
* - ESP is unparsable for now and considered like
* normal payload protocol.
* - Note also special handling of AUTH header. Thanks to IPsec wizards.
@@ -64,10 +67,13 @@ int ipv6_ext_hdr(u8 nexthdr)
* --ANK (980726)
*/
-int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
+int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
+ __be16 *frag_offp)
{
u8 nexthdr = *nexthdrp;
+ *frag_offp = 0;
+
while (ipv6_ext_hdr(nexthdr)) {
struct ipv6_opt_hdr _hdr, *hp;
int hdrlen;
@@ -87,7 +93,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
if (fp == NULL)
return -1;
- if (ntohs(*fp) & ~0x7)
+ *frag_offp = *fp;
+ if (ntohs(*frag_offp) & ~0x7)
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 295571576f83..b6c573152067 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -96,7 +96,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
if (!ipv6_prefix_equal(&saddr, &r->src.addr,
r->src.plen))
goto again;
- ipv6_addr_copy(&flp6->saddr, &saddr);
+ flp6->saddr = saddr;
}
goto out;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 90868fb42757..01d46bff63c3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -135,11 +135,12 @@ static int is_ineligible(struct sk_buff *skb)
int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
int len = skb->len - ptr;
__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
if (len < 0)
return 1;
- ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
+ ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
if (ptr < 0)
return 0;
if (nexthdr == IPPROTO_ICMPV6) {
@@ -290,9 +291,9 @@ static void mip6_addr_swap(struct sk_buff *skb)
if (likely(off >= 0)) {
hao = (struct ipv6_destopt_hao *)
(skb_network_header(skb) + off);
- ipv6_addr_copy(&tmp, &iph->saddr);
- ipv6_addr_copy(&iph->saddr, &hao->addr);
- ipv6_addr_copy(&hao->addr, &tmp);
+ tmp = iph->saddr;
+ iph->saddr = hao->addr;
+ hao->addr = tmp;
}
}
}
@@ -444,9 +445,9 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl6.daddr, &hdr->saddr);
+ fl6.daddr = hdr->saddr;
if (saddr)
- ipv6_addr_copy(&fl6.saddr, saddr);
+ fl6.saddr = *saddr;
fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
@@ -538,9 +539,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
+ fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
- ipv6_addr_copy(&fl6.saddr, saddr);
+ fl6.saddr = *saddr;
fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -596,6 +597,7 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
int inner_offset;
int hash;
u8 nexthdr;
+ __be16 frag_off;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
return;
@@ -603,7 +605,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
if (ipv6_ext_hdr(nexthdr)) {
/* now skip over extension headers */
- inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
if (inner_offset<0)
return;
} else {
@@ -786,8 +789,8 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
int oif)
{
memset(fl6, 0, sizeof(*fl6));
- ipv6_addr_copy(&fl6->saddr, saddr);
- ipv6_addr_copy(&fl6->daddr, daddr);
+ fl6->saddr = *saddr;
+ fl6->daddr = *daddr;
fl6->flowi6_proto = IPPROTO_ICMPV6;
fl6->fl6_icmp_type = type;
fl6->fl6_icmp_code = 0;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index fee46d5a2f12..02dd203d9eac 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -65,9 +65,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
+ fl6.daddr = treq->rmt_addr;
final_p = fl6_update_dst(&fl6, np->opt, &final);
- ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
+ fl6.saddr = treq->loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = inet_rsk(req)->rmt_port;
@@ -85,7 +85,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
* request_sock (formerly open request) hash tables.
*/
static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
- const u32 rnd, const u16 synq_hsize)
+ const u32 rnd, const u32 synq_hsize)
{
u32 c;
@@ -157,7 +157,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
sin6->sin6_family = AF_INET6;
- ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+ sin6->sin6_addr = np->daddr;
sin6->sin6_port = inet_sk(sk)->inet_dport;
/* We do not store received flowlabel for TCP */
sin6->sin6_flowinfo = 0;
@@ -215,8 +215,8 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = sk->sk_protocol;
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
- ipv6_addr_copy(&fl6.saddr, &np->saddr);
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
fl6.flowlabel = np->flow_label;
IP6_ECN_flow_xmit(sk, fl6.flowlabel);
fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -246,7 +246,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
skb_dst_set_noref(skb, dst);
/* Restore final destination back after routing done */
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
+ fl6.daddr = np->daddr;
res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
rcu_read_unlock();
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 93718f3db79b..b82bcde53f7a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -190,7 +190,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
struct fib6_table *table;
table = kzalloc(sizeof(*table), GFP_ATOMIC);
- if (table != NULL) {
+ if (table) {
table->tb6_id = id;
table->tb6_root.leaf = net->ipv6.ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
@@ -210,7 +210,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id)
return tb;
tb = fib6_alloc_table(net, id);
- if (tb != NULL)
+ if (tb)
fib6_link_table(net, tb);
return tb;
@@ -367,7 +367,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
s_e = cb->args[1];
w = (void *)cb->args[2];
- if (w == NULL) {
+ if (!w) {
/* New dump:
*
* 1. hook callback destructor.
@@ -379,7 +379,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
* 2. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
- if (w == NULL)
+ if (!w)
return -ENOMEM;
w->func = fib6_dump_node;
cb->args[2] = (long)w;
@@ -425,7 +425,8 @@ out:
static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
int addrlen, int plen,
- int offset)
+ int offset, int allow_create,
+ int replace_required)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -447,8 +448,18 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
* Prefix match
*/
if (plen < fn->fn_bit ||
- !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+ !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
+ if (!allow_create) {
+ if (replace_required) {
+ pr_warn("IPv6: Can't replace route, "
+ "no match found\n");
+ return ERR_PTR(-ENOENT);
+ }
+ pr_warn("IPv6: NLM_F_CREATE should be set "
+ "when creating new route\n");
+ }
goto insert_above;
+ }
/*
* Exact match ?
@@ -456,7 +467,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
if (plen == fn->fn_bit) {
/* clean up an intermediate node */
- if ((fn->fn_flags & RTN_RTINFO) == 0) {
+ if (!(fn->fn_flags & RTN_RTINFO)) {
rt6_release(fn->leaf);
fn->leaf = NULL;
}
@@ -477,6 +488,23 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
fn = dir ? fn->right: fn->left;
} while (fn);
+ if (!allow_create) {
+ /* We should not create new node because
+ * NLM_F_REPLACE was specified without NLM_F_CREATE
+ * I assume it is safe to require NLM_F_CREATE when
+ * REPLACE flag is used! Later we may want to remove the
+ * check for replace_required, because according
+ * to netlink specification, NLM_F_CREATE
+ * MUST be specified if new route is created.
+ * That would keep IPv6 consistent with IPv4
+ */
+ if (replace_required) {
+ pr_warn("IPv6: Can't replace route, no match found\n");
+ return ERR_PTR(-ENOENT);
+ }
+ pr_warn("IPv6: NLM_F_CREATE should be set "
+ "when creating new route\n");
+ }
/*
* We walked to the bottom of tree.
* Create new leaf node without children.
@@ -484,7 +512,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
ln = node_alloc();
- if (ln == NULL)
+ if (!ln)
return NULL;
ln->fn_bit = plen;
@@ -527,7 +555,7 @@ insert_above:
in = node_alloc();
ln = node_alloc();
- if (in == NULL || ln == NULL) {
+ if (!in || !ln) {
if (in)
node_free(in);
if (ln)
@@ -581,7 +609,7 @@ insert_above:
ln = node_alloc();
- if (ln == NULL)
+ if (!ln)
return NULL;
ln->fn_bit = plen;
@@ -614,10 +642,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
+ int replace = (info->nlh &&
+ (info->nlh->nlmsg_flags & NLM_F_REPLACE));
+ int add = (!info->nlh ||
+ (info->nlh->nlmsg_flags & NLM_F_CREATE));
+ int found = 0;
ins = &fn->leaf;
- for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
+ for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
/*
* Search for duplicates
*/
@@ -626,17 +659,24 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
/*
* Same priority level
*/
+ if (info->nlh &&
+ (info->nlh->nlmsg_flags & NLM_F_EXCL))
+ return -EEXIST;
+ if (replace) {
+ found++;
+ break;
+ }
- if (iter->rt6i_dev == rt->rt6i_dev &&
+ if (iter->dst.dev == rt->dst.dev &&
iter->rt6i_idev == rt->rt6i_idev &&
ipv6_addr_equal(&iter->rt6i_gateway,
&rt->rt6i_gateway)) {
- if (!(iter->rt6i_flags&RTF_EXPIRES))
+ if (!(iter->rt6i_flags & RTF_EXPIRES))
return -EEXIST;
- iter->rt6i_expires = rt->rt6i_expires;
- if (!(rt->rt6i_flags&RTF_EXPIRES)) {
+ iter->dst.expires = rt->dst.expires;
+ if (!(rt->rt6i_flags & RTF_EXPIRES)) {
iter->rt6i_flags &= ~RTF_EXPIRES;
- iter->rt6i_expires = 0;
+ iter->dst.expires = 0;
}
return -EEXIST;
}
@@ -655,17 +695,40 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
/*
* insert node
*/
+ if (!replace) {
+ if (!add)
+ pr_warn("IPv6: NLM_F_CREATE should be set when creating new route\n");
+
+add:
+ rt->dst.rt6_next = iter;
+ *ins = rt;
+ rt->rt6i_node = fn;
+ atomic_inc(&rt->rt6i_ref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
+
+ if (!(fn->fn_flags & RTN_RTINFO)) {
+ info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+ fn->fn_flags |= RTN_RTINFO;
+ }
- rt->dst.rt6_next = iter;
- *ins = rt;
- rt->rt6i_node = fn;
- atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info);
- info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
-
- if ((fn->fn_flags & RTN_RTINFO) == 0) {
- info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
- fn->fn_flags |= RTN_RTINFO;
+ } else {
+ if (!found) {
+ if (add)
+ goto add;
+ pr_warn("IPv6: NLM_F_REPLACE set, but no existing node found!\n");
+ return -ENOENT;
+ }
+ *ins = rt;
+ rt->rt6i_node = fn;
+ rt->dst.rt6_next = iter->dst.rt6_next;
+ atomic_inc(&rt->rt6i_ref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ rt6_release(iter);
+ if (!(fn->fn_flags & RTN_RTINFO)) {
+ info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
+ fn->fn_flags |= RTN_RTINFO;
+ }
}
return 0;
@@ -674,7 +737,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
{
if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
- (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
+ (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
mod_timer(&net->ipv6.ip6_fib_timer,
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
@@ -696,11 +759,28 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
+ int allow_create = 1;
+ int replace_required = 0;
+
+ if (info->nlh) {
+ if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
+ allow_create = 0;
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
+ replace_required = 1;
+ }
+ if (!allow_create && !replace_required)
+ pr_warn("IPv6: RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
- rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
+ rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst),
+ allow_create, replace_required);
+
+ if (IS_ERR(fn)) {
+ err = PTR_ERR(fn);
+ fn = NULL;
+ }
- if (fn == NULL)
+ if (!fn)
goto out;
pn = fn;
@@ -709,7 +789,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
- if (fn->subtree == NULL) {
+ if (!fn->subtree) {
struct fib6_node *sfn;
/*
@@ -724,7 +804,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
/* Create subtree root node */
sfn = node_alloc();
- if (sfn == NULL)
+ if (!sfn)
goto st_failure;
sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
@@ -736,9 +816,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
sizeof(struct in6_addr), rt->rt6i_src.plen,
- offsetof(struct rt6_info, rt6i_src));
+ offsetof(struct rt6_info, rt6i_src),
+ allow_create, replace_required);
- if (sn == NULL) {
+ if (!sn) {
/* If it is failed, discard just allocated
root, and then (in st_failure) stale node
in main tree.
@@ -753,13 +834,18 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
} else {
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
sizeof(struct in6_addr), rt->rt6i_src.plen,
- offsetof(struct rt6_info, rt6i_src));
+ offsetof(struct rt6_info, rt6i_src),
+ allow_create, replace_required);
- if (sn == NULL)
+ if (IS_ERR(sn)) {
+ err = PTR_ERR(sn);
+ sn = NULL;
+ }
+ if (!sn)
goto st_failure;
}
- if (fn->leaf == NULL) {
+ if (!fn->leaf) {
fn->leaf = rt;
atomic_inc(&rt->rt6i_ref);
}
@@ -768,10 +854,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
#endif
err = fib6_add_rt2node(fn, rt, info);
-
- if (err == 0) {
+ if (!err) {
fib6_start_gc(info->nl_net, rt);
- if (!(rt->rt6i_flags&RTF_CACHE))
+ if (!(rt->rt6i_flags & RTF_CACHE))
fib6_prune_clones(info->nl_net, pn, rt);
}
@@ -819,7 +904,7 @@ st_failure:
*/
struct lookup_args {
- int offset; /* key offset on rt6_info */
+ int offset; /* key offset on rt6_info */
const struct in6_addr *addr; /* search key */
};
@@ -849,11 +934,10 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
fn = next;
continue;
}
-
break;
}
- while(fn) {
+ while (fn) {
if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
struct rt6key *key;
@@ -900,8 +984,7 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da
};
fn = fib6_lookup_1(root, daddr ? args : args + 1);
-
- if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
+ if (!fn || fn->fn_flags & RTN_TL_ROOT)
fn = root;
return fn;
@@ -961,7 +1044,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
}
#endif
- if (fn && fn->fn_flags&RTN_RTINFO)
+ if (fn && fn->fn_flags & RTN_RTINFO)
return fn;
return NULL;
@@ -975,14 +1058,13 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
{
- if (fn->fn_flags&RTN_ROOT)
+ if (fn->fn_flags & RTN_ROOT)
return net->ipv6.ip6_null_entry;
- while(fn) {
- if(fn->left)
+ while (fn) {
+ if (fn->left)
return fn->left->leaf;
-
- if(fn->right)
+ if (fn->right)
return fn->right->leaf;
fn = FIB6_SUBTREE(fn);
@@ -1020,12 +1102,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
/* Subtree root (i.e. fn) may have one child */
- || (children && fn->fn_flags&RTN_ROOT)
+ || (children && fn->fn_flags & RTN_ROOT)
#endif
) {
fn->leaf = fib6_find_prefix(net, fn);
#if RT6_DEBUG >= 2
- if (fn->leaf==NULL) {
+ if (!fn->leaf) {
WARN_ON(!fn->leaf);
fn->leaf = net->ipv6.ip6_null_entry;
}
@@ -1058,7 +1140,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
- if (child == NULL) {
+ if (!child) {
if (w->root == fn) {
w->root = w->node = NULL;
RT6_TRACE("W %p adjusted by delroot 1\n", w);
@@ -1087,7 +1169,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_unlock(&fib6_walker_lock);
node_free(fn);
- if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
+ if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
rt6_release(pn->leaf);
@@ -1121,7 +1203,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
w->leaf = rt->dst.rt6_next;
- if (w->leaf == NULL)
+ if (!w->leaf)
w->state = FWS_U;
}
}
@@ -1130,7 +1212,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
rt->dst.rt6_next = NULL;
/* If it was last route, expunge its radix tree node */
- if (fn->leaf == NULL) {
+ if (!fn->leaf) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
fn = fib6_repair_tree(net, fn);
@@ -1144,7 +1226,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
* to still alive ones.
*/
while (fn) {
- if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
+ if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
fn->leaf = fib6_find_prefix(net, fn);
atomic_inc(&fn->leaf->rt6i_ref);
rt6_release(rt);
@@ -1171,17 +1253,17 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
return -ENOENT;
}
#endif
- if (fn == NULL || rt == net->ipv6.ip6_null_entry)
+ if (!fn || rt == net->ipv6.ip6_null_entry)
return -ENOENT;
WARN_ON(!(fn->fn_flags & RTN_RTINFO));
- if (!(rt->rt6i_flags&RTF_CACHE)) {
+ if (!(rt->rt6i_flags & RTF_CACHE)) {
struct fib6_node *pn = fn;
#ifdef CONFIG_IPV6_SUBTREES
/* clones of this route might be in another subtree */
if (rt->rt6i_src.plen) {
- while (!(pn->fn_flags&RTN_ROOT))
+ while (!(pn->fn_flags & RTN_ROOT))
pn = pn->parent;
pn = pn->parent;
}
@@ -1232,11 +1314,11 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
for (;;) {
fn = w->node;
- if (fn == NULL)
+ if (!fn)
return 0;
if (w->prune && fn != w->root &&
- fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
+ fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
w->state = FWS_C;
w->leaf = fn->leaf;
}
@@ -1265,7 +1347,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
w->state = FWS_C;
w->leaf = fn->leaf;
case FWS_C:
- if (w->leaf && fn->fn_flags&RTN_RTINFO) {
+ if (w->leaf && fn->fn_flags & RTN_RTINFO) {
int err;
if (w->count < w->skip) {
@@ -1380,6 +1462,26 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
fib6_walk(&c.w);
}
+void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
+{
+ struct fib6_table *table;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+ read_lock_bh(&table->tb6_lock);
+ fib6_clean_tree(net, &table->tb6_root,
+ func, prune, arg);
+ read_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
int prune, void *arg)
{
@@ -1439,8 +1541,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
* only if they are not in use now.
*/
- if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
- if (time_after(now, rt->rt6i_expires)) {
+ if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
+ if (time_after(now, rt->dst.expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
}
@@ -1451,7 +1553,7 @@ static int fib6_age(struct rt6_info *rt, void *arg)
RT6_TRACE("aging clone %p\n", rt);
return -1;
} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
- (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) {
+ (!(dst_get_neighbour_noref_raw(&rt->dst)->flags & NTF_ROUTER))) {
RT6_TRACE("purging route %p via non-router but gateway\n",
rt);
return -1;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 4566dbd916d3..b7867a1215b1 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -386,7 +386,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
err = -EINVAL;
goto done;
}
- ipv6_addr_copy(&fl->dst, &freq->flr_dst);
+ fl->dst = freq->flr_dst;
atomic_set(&fl->users, 1);
switch (fl->share) {
case IPV6_FL_S_EXCL:
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 027c7ff6f1e5..1ca5d45a12e8 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,6 +111,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
ipv6_addr_loopback(&hdr->daddr))
goto err;
+ /*
+ * RFC4291 2.7
+ * Multicast addresses must not be used as source addresses in IPv6
+ * packets or appear in any Routing header.
+ */
+ if (ipv6_addr_is_multicast(&hdr->saddr))
+ goto err;
+
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
@@ -272,6 +280,7 @@ int ip6_mc_input(struct sk_buff *skb)
u8 *ptr = skb_network_header(skb) + opt->ra;
struct icmp6hdr *icmp6;
u8 nexthdr = hdr->nexthdr;
+ __be16 frag_off;
int offset;
/* Check if the value of Router Alert
@@ -285,7 +294,7 @@ int ip6_mc_input(struct sk_buff *skb)
goto out;
}
offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
- &nexthdr);
+ &nexthdr, &frag_off);
if (offset < 0)
goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 84d0bd5cac93..d97e07183ce9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -136,7 +136,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock();
- neigh = dst_get_neighbour(dst);
+ neigh = dst_get_neighbour_noref(dst);
if (neigh) {
int res = neigh_output(neigh, skb);
@@ -238,8 +238,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hdr->nexthdr = proto;
hdr->hop_limit = hlimit;
- ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
- ipv6_addr_copy(&hdr->daddr, first_hop);
+ hdr->saddr = fl6->saddr;
+ hdr->daddr = *first_hop;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
@@ -290,8 +290,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
hdr->nexthdr = proto;
hdr->hop_limit = np->hop_limit;
- ipv6_addr_copy(&hdr->saddr, saddr);
- ipv6_addr_copy(&hdr->daddr, daddr);
+ hdr->saddr = *saddr;
+ hdr->daddr = *daddr;
return 0;
}
@@ -329,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
{
struct ipv6hdr *hdr = ipv6_hdr(skb);
u8 nexthdr = hdr->nexthdr;
+ __be16 frag_off;
int offset;
if (ipv6_ext_hdr(nexthdr)) {
- offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
+ offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
if (offset < 0)
return 0;
} else
@@ -462,7 +463,7 @@ int ip6_forward(struct sk_buff *skb)
send redirects to source routed frames.
We don't send redirects to frames decapsulated from IPsec.
*/
- n = dst_get_neighbour(dst);
+ n = dst_get_neighbour_noref(dst);
if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
struct in6_addr *target = NULL;
struct rt6_info *rt;
@@ -603,7 +604,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
static atomic_t ipv6_fragmentation_id;
int old, new;
- if (rt) {
+ if (rt && !(rt->dst.flags & DST_NOPEER)) {
struct inet_peer *peer;
if (!rt->rt6i_peer)
@@ -631,6 +632,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
unsigned int mtu, hlen, left, len;
+ int hroom, troom;
__be32 frag_id = 0;
int ptr, offset = 0, err=0;
u8 *prevhdr, nexthdr = 0;
@@ -797,6 +799,8 @@ slow_path:
*/
*prevhdr = NEXTHDR_FRAGMENT;
+ hroom = LL_RESERVED_SPACE(rt->dst.dev);
+ troom = rt->dst.dev->needed_tailroom;
/*
* Keep copying data until we run out.
@@ -815,7 +819,8 @@ slow_path:
* Allocate buffer.
*/
- if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
+ if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+ hroom + troom, GFP_ATOMIC)) == NULL) {
NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
@@ -828,7 +833,7 @@ slow_path:
*/
ip6_copy_metadata(frag, skb);
- skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
+ skb_reserve(frag, hroom);
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
skb_reset_network_header(frag);
fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -978,7 +983,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry of the nexthop router
*/
rcu_read_lock();
- n = dst_get_neighbour(*dst);
+ n = dst_get_neighbour_noref(*dst);
if (n && !(n->nud_state & NUD_VALID)) {
struct inet6_ifaddr *ifp;
struct flowi6 fl_gw6;
@@ -1059,7 +1064,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (err)
return ERR_PTR(err);
if (final_dst)
- ipv6_addr_copy(&fl6->daddr, final_dst);
+ fl6->daddr = *final_dst;
if (can_sleep)
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
@@ -1095,7 +1100,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (err)
return ERR_PTR(err);
if (final_dst)
- ipv6_addr_copy(&fl6->daddr, final_dst);
+ fl6->daddr = *final_dst;
if (can_sleep)
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
@@ -1588,7 +1593,7 @@ int ip6_push_pending_frames(struct sock *sk)
if (np->pmtudisc < IPV6_PMTUDISC_DO)
skb->local_df = 1;
- ipv6_addr_copy(final_dst, &fl6->daddr);
+ *final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
@@ -1604,8 +1609,8 @@ int ip6_push_pending_frames(struct sock *sk)
hdr->hop_limit = np->cork.hop_limit;
hdr->nexthdr = proto;
- ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
- ipv6_addr_copy(&hdr->daddr, final_dst);
+ hdr->saddr = fl6->saddr;
+ hdr->daddr = *final_dst;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index bdc15c9003d7..e1f7761815f3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -93,7 +93,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
@@ -289,6 +289,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
if ((err = register_netdevice(dev)) < 0)
goto failed_free;
+ strcpy(t->parms.name, dev->name);
+
dev_hold(dev);
ip6_tnl_link(ip6n, t);
return t;
@@ -651,8 +653,8 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
NULL, 0, 0);
- if (rt && rt->rt6i_dev)
- skb2->dev = rt->rt6i_dev;
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
icmpv6_send(skb2, rel_type, rel_code, rel_info);
@@ -977,8 +979,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
- ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr);
- ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr);
+ ipv6h->saddr = fl6->saddr;
+ ipv6h->daddr = fl6->daddr;
nf_reset(skb);
pkt_len = skb->len;
err = ip6_local_out(skb);
@@ -1153,8 +1155,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
/* Set up flowi template */
- ipv6_addr_copy(&fl6->saddr, &p->laddr);
- ipv6_addr_copy(&fl6->daddr, &p->raddr);
+ fl6->saddr = p->laddr;
+ fl6->daddr = p->raddr;
fl6->flowi6_oif = p->link;
fl6->flowlabel = 0;
@@ -1183,11 +1185,11 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
if (rt == NULL)
return;
- if (rt->rt6i_dev) {
- dev->hard_header_len = rt->rt6i_dev->hard_header_len +
+ if (rt->dst.dev) {
+ dev->hard_header_len = rt->dst.dev->hard_header_len +
sizeof (struct ipv6hdr);
- dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+ dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu-=8;
@@ -1210,8 +1212,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
static int
ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
{
- ipv6_addr_copy(&t->parms.laddr, &p->laddr);
- ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
t->parms.flags = p->flags;
t->parms.hop_limit = p->hop_limit;
t->parms.encap_limit = p->encap_limit;
@@ -1407,7 +1409,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
t->dev = dev;
- strcpy(t->parms.name, dev->name);
dev->tstats = alloc_percpu(struct pcpu_tstats);
if (!dev->tstats)
return -ENOMEM;
@@ -1487,6 +1488,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
static int __net_init ip6_tnl_init_net(struct net *net)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct ip6_tnl *t = NULL;
int err;
ip6n->tnls[0] = ip6n->tnls_wc;
@@ -1507,6 +1509,10 @@ static int __net_init ip6_tnl_init_net(struct net *net)
err = register_netdev(ip6n->fb_tnl_dev);
if (err < 0)
goto err_register;
+
+ t = netdev_priv(ip6n->fb_tnl_dev);
+
+ strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
return 0;
err_register:
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 449a9185b8f2..c7e95c8c579f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1105,8 +1105,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
msg->im6_msgtype = MRT6MSG_WHOLEPKT;
msg->im6_mif = mrt->mroute_reg_vif_num;
msg->im6_pad = 0;
- ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
- ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+ msg->im6_src = ipv6_hdr(pkt)->saddr;
+ msg->im6_dst = ipv6_hdr(pkt)->daddr;
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else
@@ -1131,8 +1131,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
msg->im6_msgtype = assert;
msg->im6_mif = mifi;
msg->im6_pad = 0;
- ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
- ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+ msg->im6_src = ipv6_hdr(pkt)->saddr;
+ msg->im6_dst = ipv6_hdr(pkt)->daddr;
skb_dst_set(skb, dst_clone(skb_dst(pkt)));
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -2181,8 +2181,8 @@ int ip6mr_get_route(struct net *net,
iph->payload_len = 0;
iph->nexthdr = IPPROTO_NONE;
iph->hop_limit = 0;
- ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
- ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
+ iph->saddr = rt->rt6i_src.addr;
+ iph->daddr = rt->rt6i_dst.addr;
err = ip6mr_cache_unresolved(mrt, vif, skb2);
read_unlock(&mrt_lock);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c99e3ee9781f..18a2719003c3 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -435,7 +435,7 @@ sticky_done:
goto e_inval;
np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
- ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr);
+ np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr;
retv = 0;
break;
}
@@ -503,7 +503,7 @@ done:
goto e_inval;
if (val > 255 || val < -1)
goto e_inval;
- np->mcast_hops = val;
+ np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
retv = 0;
break;
@@ -980,8 +980,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
- ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+ src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
@@ -992,8 +991,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
- ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+ src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index ee7839f4d6e3..b853f06cc148 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -155,14 +155,14 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -ENOMEM;
mc_lst->next = NULL;
- ipv6_addr_copy(&mc_lst->addr, addr);
+ mc_lst->addr = *addr;
rcu_read_lock();
if (ifindex == 0) {
struct rt6_info *rt;
rt = rt6_lookup(net, addr, NULL, 0, 0);
if (rt) {
- dev = rt->rt6i_dev;
+ dev = rt->dst.dev;
dst_release(&rt->dst);
}
} else
@@ -256,7 +256,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
if (rt) {
- dev = rt->rt6i_dev;
+ dev = rt->dst.dev;
dev_hold(dev);
dst_release(&rt->dst);
}
@@ -858,7 +858,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
- ipv6_addr_copy(&mc->mca_addr, addr);
+ mc->mca_addr = *addr;
mc->idev = idev; /* (reference taken) */
mc->mca_users = 1;
/* mca_stamp should be updated upon changes */
@@ -1343,13 +1343,15 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
struct mld2_report *pmr;
struct in6_addr addr_buf;
const struct in6_addr *saddr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
int err;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
/* we assume size > sizeof(ra) here */
- size += LL_ALLOCATED_SPACE(dev);
+ size += hlen + tlen;
/* limit our allocations to order-0 page */
size = min_t(int, size, SKB_MAX_ORDER(0, 0));
skb = sock_alloc_send_skb(sk, size, 1, &err);
@@ -1357,7 +1359,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
if (!skb)
return NULL;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
@@ -1408,18 +1410,11 @@ static void mld_sendpack(struct sk_buff *skb)
csum_partial(skb_transport_header(skb),
mldlen, 0));
- dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
-
- if (!dst) {
- err = -ENOMEM;
- goto err_out;
- }
-
icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
+ dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
err = 0;
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -1723,6 +1718,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
struct mld_msg *hdr;
const struct in6_addr *snd_addr, *saddr;
struct in6_addr addr_buf;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
@@ -1744,7 +1741,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPSTATS_MIB_OUT, full_len);
rcu_read_unlock();
- skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err);
+ skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
if (skb == NULL) {
rcu_read_lock();
@@ -1754,7 +1751,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
return;
}
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
@@ -1772,7 +1769,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
memset(hdr, 0, sizeof(struct mld_msg));
hdr->mld_type = type;
- ipv6_addr_copy(&hdr->mld_mca, addr);
+ hdr->mld_mca = *addr;
hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
IPPROTO_ICMPV6,
@@ -1781,17 +1778,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
- if (!dst) {
- err = -ENOMEM;
- goto err_out;
- }
-
icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
-
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto err_out;
@@ -1914,7 +1904,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
* Add multicast single-source filter to the interface list
*/
static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
- const struct in6_addr *psfsrc, int delta)
+ const struct in6_addr *psfsrc)
{
struct ip6_sf_list *psf, *psf_prev;
@@ -2045,7 +2035,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
pmc->mca_sfcount[sfmode]++;
err = 0;
for (i=0; i<sfcount; i++) {
- err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+ err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
if (err)
break;
}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 43242e6e6103..7e1e0fbfef21 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -195,8 +195,8 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
mip6_report_rl.iif = iif;
- ipv6_addr_copy(&mip6_report_rl.src, src);
- ipv6_addr_copy(&mip6_report_rl.dst, dst);
+ mip6_report_rl.src = *src;
+ mip6_report_rl.dst = *dst;
allow = 1;
}
spin_unlock_bh(&mip6_report_rl.lock);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 44e5b7f2a6c1..3b1fe4b3f3c6 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -93,7 +93,7 @@
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
- __u32 rnd);
+ __u32 *hash_rnd);
static int ndisc_constructor(struct neighbour *neigh);
static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -126,7 +126,6 @@ static const struct neigh_ops ndisc_direct_ops = {
struct neigh_table nd_tbl = {
.family = AF_INET6,
- .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr),
.key_len = sizeof(struct in6_addr),
.hash = ndisc_hash,
.constructor = ndisc_constructor,
@@ -141,7 +140,7 @@ struct neigh_table nd_tbl = {
.gc_staletime = 60 * HZ,
.reachable_time = ND_REACHABLE_TIME,
.delay_probe_time = 5 * HZ,
- .queue_len = 3,
+ .queue_len_bytes = 64*1024,
.ucast_probes = 3,
.mcast_probes = 3,
.anycast_delay = 1 * HZ,
@@ -350,16 +349,9 @@ EXPORT_SYMBOL(ndisc_mc_map);
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
- __u32 hash_rnd)
+ __u32 *hash_rnd)
{
- const u32 *p32 = pkey;
- u32 addr_hash, i;
-
- addr_hash = 0;
- for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
- addr_hash ^= *p32++;
-
- return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
+ return ndisc_hashfn(pkey, dev, hash_rnd);
}
static int ndisc_constructor(struct neighbour *neigh)
@@ -446,6 +438,8 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
struct sock *sk = net->ipv6.ndisc_sk;
struct sk_buff *skb;
struct icmp6hdr *hdr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
int len;
int err;
u8 *opt;
@@ -459,7 +453,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
skb = sock_alloc_send_skb(sk,
(MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_ALLOCATED_SPACE(dev)),
+ len + hlen + tlen),
1, &err);
if (!skb) {
ND_PRINTK0(KERN_ERR
@@ -468,7 +462,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
return NULL;
}
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
skb->transport_header = skb->tail;
@@ -479,7 +473,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
if (target) {
- ipv6_addr_copy((struct in6_addr *)opt, target);
+ *(struct in6_addr *)opt = *target;
opt += sizeof(*target);
}
@@ -515,14 +509,7 @@ void ndisc_send_skb(struct sk_buff *skb,
type = icmp6h->icmp6_type;
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
-
- dst = icmp6_dst_alloc(dev, neigh, daddr);
- if (!dst) {
- kfree_skb(skb);
- return;
- }
-
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ dst = icmp6_dst_alloc(dev, neigh, &fl6);
if (IS_ERR(dst)) {
kfree_skb(skb);
return;
@@ -1237,7 +1224,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
if (rt)
- neigh = dst_get_neighbour(&rt->dst);
+ neigh = dst_get_neighbour_noref(&rt->dst);
if (rt && lifetime == 0) {
neigh_clone(neigh);
@@ -1257,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- neigh = dst_get_neighbour(&rt->dst);
+ neigh = dst_get_neighbour_noref(&rt->dst);
if (neigh == NULL) {
ND_PRINTK0(KERN_ERR
"ICMPv6 RA: %s() got default router without neighbour.\n",
@@ -1271,7 +1258,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
}
if (rt)
- rt->rt6i_expires = jiffies + (HZ * lifetime);
+ rt->dst.expires = jiffies + (HZ * lifetime);
if (ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
@@ -1533,6 +1520,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
struct inet6_dev *idev;
struct flowi6 fl6;
u8 *opt;
+ int hlen, tlen;
int rd_len;
int err;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
@@ -1571,7 +1559,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
}
if (!rt->rt6i_peer)
rt6_bind_peer(rt, 1);
- if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
+ if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
goto release;
if (dev->addr_len) {
@@ -1590,9 +1578,11 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
rd_len &= ~0x7;
len += rd_len;
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
buff = sock_alloc_send_skb(sk,
(MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_ALLOCATED_SPACE(dev)),
+ len + hlen + tlen),
1, &err);
if (buff == NULL) {
ND_PRINTK0(KERN_ERR
@@ -1601,7 +1591,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
goto release;
}
- skb_reserve(buff, LL_RESERVED_SPACE(dev));
+ skb_reserve(buff, hlen);
ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
IPPROTO_ICMPV6, len);
@@ -1617,9 +1607,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
*/
addrp = (struct in6_addr *)(icmph + 1);
- ipv6_addr_copy(addrp, target);
+ *addrp = *target;
addrp++;
- ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
+ *addrp = ipv6_hdr(skb)->daddr;
opt = (u8*) (addrp + 1);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 448464844a25..9a68fb5b9e77 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -125,6 +125,16 @@ config IP6_NF_MATCH_MH
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_MATCH_RPFILTER
+ tristate '"rpfilter" reverse path filter match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option allows you to match packets whose replies would
+ go out via the interface the packet came in.
+
+ To compile it as a module, choose M here. If unsure, say N.
+ The module will be called ip6t_rpfilter.
+
config IP6_NF_MATCH_RT
tristate '"rt" Routing header match support'
depends on NETFILTER_ADVANCED
@@ -186,7 +196,6 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
- depends on NETFILTER_ADVANCED
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index abfee91ce816..2eaed96db02c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
+obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
# targets
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index e63c3972a739..fb80a23c6640 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -405,6 +405,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
int status, type, pid, flags;
unsigned int nlmsglen, skblen;
struct nlmsghdr *nlh;
+ bool enable_timestamp = false;
skblen = skb->len;
if (skblen < sizeof(*nlh))
@@ -442,11 +443,13 @@ __ipq_rcv_skb(struct sk_buff *skb)
RCV_SKB_FAIL(-EBUSY);
}
} else {
- net_enable_timestamp();
+ enable_timestamp = true;
peer_pid = pid;
}
spin_unlock_bh(&queue_lock);
+ if (enable_timestamp)
+ net_enable_timestamp();
status = ipq_receive_peer(NLMSG_DATA(nlh), type,
nlmsglen - NLMSG_LENGTH(0));
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index a5a4c5dd5396..aad2fa41cf46 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -49,6 +49,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
const __u8 tclass = DEFAULT_TOS_VALUE;
struct dst_entry *dst = NULL;
u8 proto;
+ __be16 frag_off;
struct flowi6 fl6;
if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
@@ -58,7 +59,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
}
proto = oip6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
+ tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);
if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
pr_debug("Cannot get TCP header.\n");
@@ -93,8 +94,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl6.saddr, &oip6h->daddr);
- ipv6_addr_copy(&fl6.daddr, &oip6h->saddr);
+ fl6.saddr = oip6h->daddr;
+ fl6.daddr = oip6h->saddr;
fl6.fl6_sport = otcph.dest;
fl6.fl6_dport = otcph.source;
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
@@ -129,8 +130,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
*(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20));
ip6h->hop_limit = ip6_dst_hoplimit(dst);
ip6h->nexthdr = IPPROTO_TCP;
- ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
- ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
+ ip6h->saddr = oip6h->daddr;
+ ip6h->daddr = oip6h->saddr;
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
new file mode 100644
index 000000000000..5d1d8b04d694
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/route.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match");
+
+static bool rpfilter_addr_unicast(const struct in6_addr *addr)
+{
+ int addr_type = ipv6_addr_type(addr);
+ return addr_type & IPV6_ADDR_UNICAST;
+}
+
+static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
+ const struct net_device *dev, u8 flags)
+{
+ struct rt6_info *rt;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ bool ret = false;
+ struct flowi6 fl6 = {
+ .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
+ .flowi6_proto = iph->nexthdr,
+ .daddr = iph->saddr,
+ };
+ int lookup_flags;
+
+ if (rpfilter_addr_unicast(&iph->daddr)) {
+ memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr));
+ lookup_flags = RT6_LOOKUP_F_HAS_SADDR;
+ } else {
+ lookup_flags = 0;
+ }
+
+ fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+ if ((flags & XT_RPFILTER_LOOSE) == 0) {
+ fl6.flowi6_oif = dev->ifindex;
+ lookup_flags |= RT6_LOOKUP_F_IFACE;
+ }
+
+ rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
+ if (rt->dst.error)
+ goto out;
+
+ if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST))
+ goto out;
+
+ if (rt->rt6i_flags & RTF_LOCAL) {
+ ret = flags & XT_RPFILTER_ACCEPT_LOCAL;
+ goto out;
+ }
+
+ if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+ ret = true;
+ out:
+ dst_release(&rt->dst);
+ return ret;
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ int saddrtype;
+ struct ipv6hdr *iph;
+ bool invert = info->flags & XT_RPFILTER_INVERT;
+
+ if (par->in->flags & IFF_LOOPBACK)
+ return true ^ invert;
+
+ iph = ipv6_hdr(skb);
+ saddrtype = ipv6_addr_type(&iph->saddr);
+ if (unlikely(saddrtype == IPV6_ADDR_ANY))
+ return true ^ invert; /* not routable: forward path will drop it */
+
+ return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+
+ if (info->flags & options) {
+ pr_info("unknown options encountered");
+ return -EINVAL;
+ }
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "raw") != 0) {
+ pr_info("match only valid in the \'raw\' "
+ "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+ .name = "rpfilter",
+ .family = NFPROTO_IPV6,
+ .checkentry = rpfilter_check,
+ .match = rpfilter_mt,
+ .matchsize = sizeof(struct xt_rpfilter_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING),
+ .me = THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+ return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+ xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index c9e37c8fd62c..a8f6da97e3b2 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -44,7 +44,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
-static int forward = NF_ACCEPT;
+static bool forward = NF_ACCEPT;
module_param(forward, bool, 0000);
static int __net_init ip6table_filter_net_init(struct net *net)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 1008ce94bc33..fdeb6d03da81 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -142,11 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_SENTINEL
};
-/* can be called either with percpu mib (pcpumib != NULL),
- * or shared one (smib != NULL)
- */
-static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **pcpumib,
- atomic_long_t *smib)
+static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
{
char name[32];
int i;
@@ -163,14 +159,14 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **pcpum
snprintf(name, sizeof(name), "Icmp6%s%s",
i & 0x100 ? "Out" : "In", p);
seq_printf(seq, "%-32s\t%lu\n", name,
- pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i));
+ atomic_long_read(smib + i));
}
/* print by number (nonzero only) - ICMPMsgStat format */
for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
unsigned long val;
- val = pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i);
+ val = atomic_long_read(smib + i);
if (!val)
continue;
snprintf(name, sizeof(name), "Icmp6%sType%u",
@@ -215,8 +211,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
NULL, snmp6_icmp6_list);
- snmp6_seq_show_icmpv6msg(seq,
- (void __percpu **)net->mib.icmpv6msg_statistics, NULL);
+ snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
NULL, snmp6_udp6_list);
snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
@@ -246,7 +241,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
snmp6_ipstats_list);
snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
snmp6_icmp6_list);
- snmp6_seq_show_icmpv6msg(seq, NULL, idev->stats.icmpv6msgdev->mibs);
+ snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs);
return 0;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 331af3b882ac..a4894f4f1944 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -299,9 +299,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
- ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+ np->rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
- ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+ np->saddr = addr->sin6_addr;
err = 0;
out_unlock:
rcu_read_unlock();
@@ -383,7 +383,8 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
/* Charge it to the socket. */
- if (ip_queue_rcv_skb(sk, skb) < 0) {
+ skb_dst_drop(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -494,7 +495,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
if (sin6) {
sin6->sin6_family = AF_INET6;
sin6->sin6_port = 0;
- ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
+ sin6->sin6_addr = ipv6_hdr(skb)->saddr;
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -610,6 +611,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
struct sk_buff *skb;
int err;
struct rt6_info *rt = (struct rt6_info *)*dstp;
+ int hlen = LL_RESERVED_SPACE(rt->dst.dev);
+ int tlen = rt->dst.dev->needed_tailroom;
if (length > rt->dst.dev->mtu) {
ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
@@ -619,11 +622,11 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
goto out;
skb = sock_alloc_send_skb(sk,
- length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
+ length + hlen + tlen + 15,
flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
- skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
+ skb_reserve(skb, hlen);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
@@ -843,11 +846,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out;
if (!ipv6_addr_any(daddr))
- ipv6_addr_copy(&fl6.daddr, daddr);
+ fl6.daddr = *daddr;
else
fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
- ipv6_addr_copy(&fl6.saddr, &np->saddr);
+ fl6.saddr = np->saddr;
final_p = fl6_update_dst(&fl6, opt, &final);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index dfb164e9051a..b69fae76a6f1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -153,8 +153,8 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
fq->id = arg->id;
fq->user = arg->user;
- ipv6_addr_copy(&fq->saddr, arg->src);
- ipv6_addr_copy(&fq->daddr, arg->dst);
+ fq->saddr = *arg->src;
+ fq->daddr = *arg->dst;
}
EXPORT_SYMBOL(ip6_frag_init);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8473016bba4a..07361dfa8085 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -62,22 +62,11 @@
#include <linux/sysctl.h>
#endif
-/* Set to 3 to get tracing. */
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RDBG(x) printk x
-#define RT6_TRACE(x...) printk(KERN_DEBUG x)
-#else
-#define RDBG(x)
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
const struct in6_addr *dest);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
-static unsigned int ip6_default_mtu(const struct dst_entry *dst);
+static unsigned int ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
@@ -134,7 +123,23 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
{
- return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
+ struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
+ if (n)
+ return n;
+ return neigh_create(&nd_tbl, daddr, dst->dev);
+}
+
+static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
+{
+ struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
+ if (!n) {
+ n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ }
+ dst_set_neighbour(&rt->dst, n);
+
+ return 0;
}
static struct dst_ops ip6_dst_ops_template = {
@@ -144,7 +149,7 @@ static struct dst_ops ip6_dst_ops_template = {
.gc_thresh = 1024,
.check = ip6_dst_check,
.default_advmss = ip6_default_advmss,
- .default_mtu = ip6_default_mtu,
+ .mtu = ip6_mtu,
.cow_metrics = ipv6_cow_metrics,
.destroy = ip6_dst_destroy,
.ifdown = ip6_dst_ifdown,
@@ -155,9 +160,11 @@ static struct dst_ops ip6_dst_ops_template = {
.neigh_lookup = ip6_neigh_lookup,
};
-static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
+static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
{
- return 0;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst->dev->mtu;
}
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -175,7 +182,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.protocol = cpu_to_be16(ETH_P_IPV6),
.destroy = ip6_dst_destroy,
.check = ip6_dst_check,
- .default_mtu = ip6_blackhole_default_mtu,
+ .mtu = ip6_blackhole_mtu,
.default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
.cow_metrics = ip6_rt_blackhole_cow_metrics,
@@ -245,9 +252,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
{
struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
- if (rt != NULL)
+ if (rt)
memset(&rt->rt6i_table, 0,
- sizeof(*rt) - sizeof(struct dst_entry));
+ sizeof(*rt) - sizeof(struct dst_entry));
return rt;
}
@@ -261,7 +268,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
if (!(rt->dst.flags & DST_HOST))
dst_destroy_metrics_generic(dst);
- if (idev != NULL) {
+ if (idev) {
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
@@ -297,10 +304,10 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
+ if (dev != loopback_dev && idev && idev->dev == dev) {
struct inet6_dev *loopback_idev =
in6_dev_get(loopback_dev);
- if (loopback_idev != NULL) {
+ if (loopback_idev) {
rt->rt6i_idev = loopback_idev;
in6_dev_put(idev);
}
@@ -310,7 +317,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static __inline__ int rt6_check_expired(const struct rt6_info *rt)
{
return (rt->rt6i_flags & RTF_EXPIRES) &&
- time_after(jiffies, rt->rt6i_expires);
+ time_after(jiffies, rt->dst.expires);
}
static inline int rt6_need_strict(const struct in6_addr *daddr)
@@ -336,13 +343,13 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
goto out;
for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
- struct net_device *dev = sprt->rt6i_dev;
+ struct net_device *dev = sprt->dst.dev;
if (oif) {
if (dev->ifindex == oif)
return sprt;
if (dev->flags & IFF_LOOPBACK) {
- if (sprt->rt6i_idev == NULL ||
+ if (!sprt->rt6i_idev ||
sprt->rt6i_idev->dev->ifindex != oif) {
if (flags & RT6_LOOKUP_F_IFACE && oif)
continue;
@@ -383,7 +390,7 @@ static void rt6_probe(struct rt6_info *rt)
* to no more than one per minute.
*/
rcu_read_lock();
- neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
+ neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
if (!neigh || (neigh->nud_state & NUD_VALID))
goto out;
read_lock_bh(&neigh->lock);
@@ -397,7 +404,7 @@ static void rt6_probe(struct rt6_info *rt)
target = (struct in6_addr *)&neigh->primary_key;
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
+ ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
} else {
read_unlock_bh(&neigh->lock);
}
@@ -415,7 +422,7 @@ static inline void rt6_probe(struct rt6_info *rt)
*/
static inline int rt6_check_dev(struct rt6_info *rt, int oif)
{
- struct net_device *dev = rt->rt6i_dev;
+ struct net_device *dev = rt->dst.dev;
if (!oif || dev->ifindex == oif)
return 2;
if ((dev->flags & IFF_LOOPBACK) &&
@@ -430,7 +437,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
int m;
rcu_read_lock();
- neigh = dst_get_neighbour(&rt->dst);
+ neigh = dst_get_neighbour_noref(&rt->dst);
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
@@ -516,9 +523,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
struct rt6_info *match, *rt0;
struct net *net;
- RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
- __func__, fn->leaf, oif);
-
rt0 = fn->rr_ptr;
if (!rt0)
fn->rr_ptr = rt0 = fn->leaf;
@@ -537,10 +541,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
fn->rr_ptr = next;
}
- RT6_TRACE("%s() => %p\n",
- __func__, match);
-
- net = dev_net(rt0->rt6i_dev);
+ net = dev_net(rt0->dst.dev);
return match ? match : net->ipv6.ip6_null_entry;
}
@@ -609,7 +610,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
if (!addrconf_finite_timeout(lifetime)) {
rt->rt6i_flags &= ~RTF_EXPIRES;
} else {
- rt->rt6i_expires = jiffies + HZ * lifetime;
+ rt->dst.expires = jiffies + HZ * lifetime;
rt->rt6i_flags |= RTF_EXPIRES;
}
dst_release(&rt->dst);
@@ -634,7 +635,7 @@ do { \
goto restart; \
} \
} \
-} while(0)
+} while (0)
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
@@ -656,6 +657,13 @@ out:
}
+struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
+ int flags)
+{
+ return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+}
+EXPORT_SYMBOL_GPL(ip6_route_lookup);
+
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int strict)
{
@@ -704,7 +712,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
int ip6_ins_rt(struct rt6_info *rt)
{
struct nl_info info = {
- .nl_net = dev_net(rt->rt6i_dev),
+ .nl_net = dev_net(rt->dst.dev),
};
return __ip6_ins_rt(rt, &info);
}
@@ -722,29 +730,27 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
rt = ip6_rt_copy(ort, daddr);
if (rt) {
- struct neighbour *neigh;
int attempts = !in_softirq();
- if (!(rt->rt6i_flags&RTF_GATEWAY)) {
- if (rt->rt6i_dst.plen != 128 &&
+ if (!(rt->rt6i_flags & RTF_GATEWAY)) {
+ if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
- ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+ rt->rt6i_gateway = *daddr;
}
rt->rt6i_flags |= RTF_CACHE;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
- ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
+ rt->rt6i_src.addr = *saddr;
rt->rt6i_src.plen = 128;
}
#endif
retry:
- neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
- if (IS_ERR(neigh)) {
- struct net *net = dev_net(rt->rt6i_dev);
+ if (rt6_bind_neighbour(rt, rt->dst.dev)) {
+ struct net *net = dev_net(rt->dst.dev);
int saved_rt_min_interval =
net->ipv6.sysctl.ip6_rt_gc_min_interval;
int saved_rt_elasticity =
@@ -769,8 +775,6 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
dst_free(&rt->dst);
return NULL;
}
- dst_set_neighbour(&rt->dst, neigh);
-
}
return rt;
@@ -783,7 +787,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
if (rt) {
rt->rt6i_flags |= RTF_CACHE;
- dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
+ dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
}
return rt;
}
@@ -817,7 +821,7 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -873,7 +877,7 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
+ .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
@@ -930,9 +934,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
- rt->rt6i_expires = 0;
+ rt->dst.expires = 0;
- ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+ rt->rt6i_gateway = ort->rt6i_gateway;
rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
rt->rt6i_metric = 0;
@@ -995,7 +999,7 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
- if (rt->rt6i_flags&RTF_CACHE) {
+ if (rt->rt6i_flags & RTF_CACHE) {
dst_set_expires(&rt->dst, 0);
rt->rt6i_flags |= RTF_EXPIRES;
} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
@@ -1041,10 +1045,15 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
return mtu;
}
-static unsigned int ip6_default_mtu(const struct dst_entry *dst)
+static unsigned int ip6_mtu(const struct dst_entry *dst)
{
- unsigned int mtu = IPV6_MIN_MTU;
struct inet6_dev *idev;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ if (mtu)
+ return mtu;
+
+ mtu = IPV6_MIN_MTU;
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
@@ -1060,34 +1069,38 @@ static DEFINE_SPINLOCK(icmp6_dst_lock);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
struct neighbour *neigh,
- const struct in6_addr *addr)
+ struct flowi6 *fl6)
{
+ struct dst_entry *dst;
struct rt6_info *rt;
struct inet6_dev *idev = in6_dev_get(dev);
struct net *net = dev_net(dev);
- if (unlikely(idev == NULL))
+ if (unlikely(!idev))
return NULL;
rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
- if (unlikely(rt == NULL)) {
+ if (unlikely(!rt)) {
in6_dev_put(idev);
+ dst = ERR_PTR(-ENOMEM);
goto out;
}
if (neigh)
neigh_hold(neigh);
else {
- neigh = ndisc_get_neigh(dev, addr);
- if (IS_ERR(neigh))
- neigh = NULL;
+ neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+ if (IS_ERR(neigh)) {
+ dst_free(&rt->dst);
+ return ERR_CAST(neigh);
+ }
}
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
dst_set_neighbour(&rt->dst, neigh);
atomic_set(&rt->dst.__refcnt, 1);
- ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+ rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev;
dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
@@ -1099,8 +1112,10 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
fib6_force_start_gc(net);
+ dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
+
out:
- return &rt->dst;
+ return dst;
}
int icmp6_dst_gc(void)
@@ -1230,21 +1245,30 @@ int ip6_route_add(struct fib6_config *cfg)
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
- table = fib6_new_table(net, cfg->fc_table);
- if (table == NULL) {
- err = -ENOBUFS;
- goto out;
+ err = -ENOBUFS;
+ if (cfg->fc_nlinfo.nlh &&
+ !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
+ table = fib6_get_table(net, cfg->fc_table);
+ if (!table) {
+ printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
+ table = fib6_new_table(net, cfg->fc_table);
+ }
+ } else {
+ table = fib6_new_table(net, cfg->fc_table);
}
+ if (!table)
+ goto out;
+
rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
- if (rt == NULL) {
+ if (!rt) {
err = -ENOMEM;
goto out;
}
rt->dst.obsolete = -1;
- rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
+ rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
jiffies + clock_t_to_jiffies(cfg->fc_expires) :
0;
@@ -1287,8 +1311,9 @@ int ip6_route_add(struct fib6_config *cfg)
they would result in kernel looping; promote them to reject routes
*/
if ((cfg->fc_flags & RTF_REJECT) ||
- (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
- && !(cfg->fc_flags&RTF_LOCAL))) {
+ (dev && (dev->flags & IFF_LOOPBACK) &&
+ !(addr_type & IPV6_ADDR_LOOPBACK) &&
+ !(cfg->fc_flags & RTF_LOCAL))) {
/* hold loopback dev/idev if we haven't done so. */
if (dev != net->loopback_dev) {
if (dev) {
@@ -1315,7 +1340,7 @@ int ip6_route_add(struct fib6_config *cfg)
int gwa_type;
gw_addr = &cfg->fc_gateway;
- ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
+ rt->rt6i_gateway = *gw_addr;
gwa_type = ipv6_addr_type(gw_addr);
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1329,26 +1354,26 @@ int ip6_route_add(struct fib6_config *cfg)
some exceptions. --ANK
*/
err = -EINVAL;
- if (!(gwa_type&IPV6_ADDR_UNICAST))
+ if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
err = -EHOSTUNREACH;
- if (grt == NULL)
+ if (!grt)
goto out;
if (dev) {
- if (dev != grt->rt6i_dev) {
+ if (dev != grt->dst.dev) {
dst_release(&grt->dst);
goto out;
}
} else {
- dev = grt->rt6i_dev;
+ dev = grt->dst.dev;
idev = grt->rt6i_idev;
dev_hold(dev);
in6_dev_hold(grt->rt6i_idev);
}
- if (!(grt->rt6i_flags&RTF_GATEWAY))
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
err = 0;
dst_release(&grt->dst);
@@ -1356,12 +1381,12 @@ int ip6_route_add(struct fib6_config *cfg)
goto out;
}
err = -EINVAL;
- if (dev == NULL || (dev->flags&IFF_LOOPBACK))
+ if (!dev || (dev->flags & IFF_LOOPBACK))
goto out;
}
err = -ENODEV;
- if (dev == NULL)
+ if (!dev)
goto out;
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
@@ -1369,18 +1394,15 @@ int ip6_route_add(struct fib6_config *cfg)
err = -EINVAL;
goto out;
}
- ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
+ rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
rt->rt6i_prefsrc.plen = 128;
} else
rt->rt6i_prefsrc.plen = 0;
if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
- struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
- if (IS_ERR(n)) {
- err = PTR_ERR(n);
+ err = rt6_bind_neighbour(rt, dev);
+ if (err)
goto out;
- }
- dst_set_neighbour(&rt->dst, n);
}
rt->rt6i_flags = cfg->fc_flags;
@@ -1426,7 +1448,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
{
int err;
struct fib6_table *table;
- struct net *net = dev_net(rt->rt6i_dev);
+ struct net *net = dev_net(rt->dst.dev);
if (rt == net->ipv6.ip6_null_entry)
return -ENOENT;
@@ -1445,7 +1467,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
int ip6_del_rt(struct rt6_info *rt)
{
struct nl_info info = {
- .nl_net = dev_net(rt->rt6i_dev),
+ .nl_net = dev_net(rt->dst.dev),
};
return __ip6_del_rt(rt, &info);
}
@@ -1458,7 +1480,7 @@ static int ip6_route_del(struct fib6_config *cfg)
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
- if (table == NULL)
+ if (!table)
return err;
read_lock_bh(&table->tb6_lock);
@@ -1470,8 +1492,8 @@ static int ip6_route_del(struct fib6_config *cfg)
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if (cfg->fc_ifindex &&
- (rt->rt6i_dev == NULL ||
- rt->rt6i_dev->ifindex != cfg->fc_ifindex))
+ (!rt->dst.dev ||
+ rt->dst.dev->ifindex != cfg->fc_ifindex))
continue;
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
@@ -1533,7 +1555,7 @@ restart:
continue;
if (!(rt->rt6i_flags & RTF_GATEWAY))
continue;
- if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
+ if (fl6->flowi6_oif != rt->dst.dev->ifindex)
continue;
if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
continue;
@@ -1566,7 +1588,7 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
},
};
- ipv6_addr_copy(&rdfl.gateway, gateway);
+ rdfl.gateway = *gateway;
if (rt6_need_strict(dest))
flags |= RT6_LOOKUP_F_IFACE;
@@ -1611,18 +1633,18 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
dst_confirm(&rt->dst);
/* Duplicate redirect: silently ignore. */
- if (neigh == dst_get_neighbour_raw(&rt->dst))
+ if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
goto out;
nrt = ip6_rt_copy(rt, dest);
- if (nrt == NULL)
+ if (!nrt)
goto out;
nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
if (on_link)
nrt->rt6i_flags &= ~RTF_GATEWAY;
- ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
+ nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
if (ip6_ins_rt(nrt))
@@ -1632,7 +1654,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
netevent.new = &nrt->dst;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
- if (rt->rt6i_flags&RTF_CACHE) {
+ if (rt->rt6i_flags & RTF_CACHE) {
ip6_del_rt(rt);
return;
}
@@ -1653,7 +1675,7 @@ static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr
int allfrag = 0;
again:
rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
- if (rt == NULL)
+ if (!rt)
return;
if (rt6_check_expired(rt)) {
@@ -1703,7 +1725,7 @@ again:
1. It is connected route. Action: COW
2. It is gatewayed route or NONEXTHOP route. Action: clone it.
*/
- if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, daddr, saddr);
else
nrt = rt6_alloc_clone(rt, daddr);
@@ -1759,7 +1781,7 @@ void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *sad
static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
const struct in6_addr *dest)
{
- struct net *net = dev_net(ort->rt6i_dev);
+ struct net *net = dev_net(ort->dst.dev);
struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
ort->dst.dev, 0);
@@ -1768,7 +1790,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
rt->dst.output = ort->dst.output;
rt->dst.flags |= DST_HOST;
- ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
+ rt->rt6i_dst.addr = *dest;
rt->rt6i_dst.plen = 128;
dst_copy_metrics(&rt->dst, &ort->dst);
rt->dst.error = ort->dst.error;
@@ -1776,9 +1798,9 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies;
- rt->rt6i_expires = 0;
+ rt->dst.expires = 0;
- ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+ rt->rt6i_gateway = ort->rt6i_gateway;
rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
rt->rt6i_metric = 0;
@@ -1801,7 +1823,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
struct fib6_table *table;
table = fib6_get_table(net, RT6_TABLE_INFO);
- if (table == NULL)
+ if (!table)
return NULL;
write_lock_bh(&table->tb6_lock);
@@ -1810,7 +1832,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
goto out;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- if (rt->rt6i_dev->ifindex != ifindex)
+ if (rt->dst.dev->ifindex != ifindex)
continue;
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
continue;
@@ -1841,8 +1863,8 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_nlinfo.nl_net = net,
};
- ipv6_addr_copy(&cfg.fc_dst, prefix);
- ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+ cfg.fc_dst = *prefix;
+ cfg.fc_gateway = *gwaddr;
/* We should treat it as a default route if prefix length is 0. */
if (!prefixlen)
@@ -1860,12 +1882,12 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
struct fib6_table *table;
table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
- if (table == NULL)
+ if (!table)
return NULL;
write_lock_bh(&table->tb6_lock);
for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
- if (dev == rt->rt6i_dev &&
+ if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
break;
@@ -1891,7 +1913,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
.fc_nlinfo.nl_net = dev_net(dev),
};
- ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+ cfg.fc_gateway = *gwaddr;
ip6_route_add(&cfg);
@@ -1905,7 +1927,7 @@ void rt6_purge_dflt_routers(struct net *net)
/* NOTE: Keep consistent with rt6_get_dflt_router */
table = fib6_get_table(net, RT6_TABLE_DFLT);
- if (table == NULL)
+ if (!table)
return;
restart:
@@ -1937,9 +1959,9 @@ static void rtmsg_to_fib6_config(struct net *net,
cfg->fc_nlinfo.nl_net = net;
- ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
- ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
- ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
+ cfg->fc_dst = rtmsg->rtmsg_dst;
+ cfg->fc_src = rtmsg->rtmsg_src;
+ cfg->fc_gateway = rtmsg->rtmsg_gateway;
}
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -2038,14 +2060,14 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb)
struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
- int anycast)
+ bool anycast)
{
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
net->loopback_dev, 0);
- struct neighbour *neigh;
+ int err;
- if (rt == NULL) {
+ if (!rt) {
if (net_ratelimit())
pr_warning("IPv6: Maximum number of routes reached,"
" consider increasing route/max_size.\n");
@@ -2065,15 +2087,13 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_flags |= RTF_ANYCAST;
else
rt->rt6i_flags |= RTF_LOCAL;
- neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
- if (IS_ERR(neigh)) {
+ err = rt6_bind_neighbour(rt, rt->dst.dev);
+ if (err) {
dst_free(&rt->dst);
-
- return ERR_CAST(neigh);
+ return ERR_PTR(err);
}
- dst_set_neighbour(&rt->dst, neigh);
- ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+ rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
@@ -2091,7 +2111,7 @@ int ip6_route_get_saddr(struct net *net,
struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
int err = 0;
if (rt->rt6i_prefsrc.plen)
- ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
+ *saddr = rt->rt6i_prefsrc.addr;
else
err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
daddr, prefs, saddr);
@@ -2111,7 +2131,7 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
+ if (((void *)rt->dst.dev == dev || !dev) &&
rt != net->ipv6.ip6_null_entry &&
ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
/* remove prefsrc entry */
@@ -2141,11 +2161,10 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
const struct arg_dev_net *adn = arg;
const struct net_device *dev = adn->dev;
- if ((rt->rt6i_dev == dev || dev == NULL) &&
- rt != adn->net->ipv6.ip6_null_entry) {
- RT6_TRACE("deleted by ifdown %p\n", rt);
+ if ((rt->dst.dev == dev || !dev) &&
+ rt != adn->net->ipv6.ip6_null_entry)
return -1;
- }
+
return 0;
}
@@ -2178,7 +2197,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
*/
idev = __in6_dev_get(arg->dev);
- if (idev == NULL)
+ if (!idev)
return 0;
/* For administrative MTU increase, there is no way to discover
@@ -2195,7 +2214,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
also have the lowest MTU, TOO BIG MESSAGE will be lead to
PMTU discouvery.
*/
- if (rt->rt6i_dev == arg->dev &&
+ if (rt->dst.dev == arg->dev &&
!dst_metric_locked(&rt->dst, RTAX_MTU) &&
(dst_mtu(&rt->dst) >= arg->mtu ||
(dst_mtu(&rt->dst) < arg->mtu &&
@@ -2344,11 +2363,13 @@ static int rt6_fill_node(struct net *net,
int iif, int type, u32 pid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
+ const struct inet_peer *peer;
struct rtmsg *rtm;
struct nlmsghdr *nlh;
long expires;
u32 table;
struct neighbour *n;
+ u32 ts, tsage;
if (prefix) { /* user wants prefix routes only */
if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2358,7 +2379,7 @@ static int rt6_fill_node(struct net *net,
}
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
- if (nlh == NULL)
+ if (!nlh)
return -EMSGSIZE;
rtm = nlmsg_data(nlh);
@@ -2372,25 +2393,25 @@ static int rt6_fill_node(struct net *net,
table = RT6_TABLE_UNSPEC;
rtm->rtm_table = table;
NLA_PUT_U32(skb, RTA_TABLE, table);
- if (rt->rt6i_flags&RTF_REJECT)
+ if (rt->rt6i_flags & RTF_REJECT)
rtm->rtm_type = RTN_UNREACHABLE;
- else if (rt->rt6i_flags&RTF_LOCAL)
+ else if (rt->rt6i_flags & RTF_LOCAL)
rtm->rtm_type = RTN_LOCAL;
- else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
+ else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
rtm->rtm_type = RTN_LOCAL;
else
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->rt6i_protocol;
- if (rt->rt6i_flags&RTF_DYNAMIC)
+ if (rt->rt6i_flags & RTF_DYNAMIC)
rtm->rtm_protocol = RTPROT_REDIRECT;
else if (rt->rt6i_flags & RTF_ADDRCONF)
rtm->rtm_protocol = RTPROT_KERNEL;
- else if (rt->rt6i_flags&RTF_DEFAULT)
+ else if (rt->rt6i_flags & RTF_DEFAULT)
rtm->rtm_protocol = RTPROT_RA;
- if (rt->rt6i_flags&RTF_CACHE)
+ if (rt->rt6i_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
if (dst) {
@@ -2430,7 +2451,7 @@ static int rt6_fill_node(struct net *net,
if (rt->rt6i_prefsrc.plen) {
struct in6_addr saddr_buf;
- ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
+ saddr_buf = rt->rt6i_prefsrc.addr;
NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
}
@@ -2438,24 +2459,31 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
rcu_read_lock();
- n = dst_get_neighbour(&rt->dst);
+ n = dst_get_neighbour_noref(&rt->dst);
if (n)
NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
rcu_read_unlock();
if (rt->dst.dev)
- NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+ NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
if (!(rt->rt6i_flags & RTF_EXPIRES))
expires = 0;
- else if (rt->rt6i_expires - jiffies < INT_MAX)
- expires = rt->rt6i_expires - jiffies;
+ else if (rt->dst.expires - jiffies < INT_MAX)
+ expires = rt->dst.expires - jiffies;
else
expires = INT_MAX;
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+ peer = rt->rt6i_peer;
+ ts = tsage = 0;
+ if (peer && peer->tcp_ts_stamp) {
+ ts = peer->tcp_ts;
+ tsage = get_seconds() - peer->tcp_ts_stamp;
+ }
+
+ if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
expires, rt->dst.error) < 0)
goto nla_put_failure;
@@ -2504,14 +2532,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
goto errout;
- ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
+ fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
}
if (tb[RTA_DST]) {
if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
goto errout;
- ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
+ fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
}
if (tb[RTA_IIF])
@@ -2530,7 +2558,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
}
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (skb == NULL) {
+ if (!skb) {
err = -ENOBUFS;
goto errout;
}
@@ -2565,10 +2593,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
int err;
err = -ENOBUFS;
- seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
+ seq = info->nlh ? info->nlh->nlmsg_seq : 0;
skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
- if (skb == NULL)
+ if (!skb)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
@@ -2635,7 +2663,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
rcu_read_lock();
- n = dst_get_neighbour(&rt->dst);
+ n = dst_get_neighbour_noref(&rt->dst);
if (n) {
seq_printf(m, "%pi6", n->primary_key);
} else {
@@ -2645,14 +2673,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
seq_printf(m, " %08x %08x %08x %08x %8s\n",
rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
rt->dst.__use, rt->rt6i_flags,
- rt->rt6i_dev ? rt->rt6i_dev->name : "");
+ rt->dst.dev ? rt->dst.dev->name : "");
return 0;
}
static int ipv6_route_show(struct seq_file *m, void *v)
{
struct net *net = (struct net *)m->private;
- fib6_clean_all(net, rt6_info_route, 0, m);
+ fib6_clean_all_ro(net, rt6_info_route, 0, m);
return 0;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a7a18602a046..3b6dac956bb0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -91,7 +91,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
{
@@ -263,6 +263,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
if (register_netdevice(dev) < 0)
goto failed_free;
+ strcpy(nt->parms.name, dev->name);
+
dev_hold(dev);
ipip6_tunnel_link(sitn, nt);
@@ -680,7 +682,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct neighbour *neigh = NULL;
if (skb_dst(skb))
- neigh = dst_get_neighbour(skb_dst(skb));
+ neigh = dst_get_neighbour_noref(skb_dst(skb));
if (neigh == NULL) {
if (net_ratelimit())
@@ -705,7 +707,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct neighbour *neigh = NULL;
if (skb_dst(skb))
- neigh = dst_get_neighbour(skb_dst(skb));
+ neigh = dst_get_neighbour_noref(skb_dst(skb));
if (neigh == NULL) {
if (net_ratelimit())
@@ -914,7 +916,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
#ifdef CONFIG_IPV6_SIT_6RD
} else {
- ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
+ ip6rd.prefix = t->ip6rd.prefix;
ip6rd.relay_prefix = t->ip6rd.relay_prefix;
ip6rd.prefixlen = t->ip6rd.prefixlen;
ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
@@ -1082,7 +1084,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if (relay_prefix != ip6rd.relay_prefix)
goto done;
- ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
+ t->ip6rd.prefix = prefix;
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd.prefixlen;
t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
@@ -1144,7 +1146,6 @@ static int ipip6_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -1207,6 +1208,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
static int __net_init sit_init_net(struct net *net)
{
struct sit_net *sitn = net_generic(net, sit_net_id);
+ struct ip_tunnel *t;
int err;
sitn->tunnels[0] = sitn->tunnels_wc;
@@ -1231,6 +1233,9 @@ static int __net_init sit_init_net(struct net *net)
if ((err = register_netdev(sitn->fb_tunnel_dev)))
goto err_reg_dev;
+ t = netdev_priv(sitn->fb_tunnel_dev);
+
+ strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
return 0;
err_reg_dev:
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5a0d6648bbbc..8e951d8d3b81 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -200,8 +200,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
req->mss = mss;
ireq->rmt_port = th->source;
ireq->loc_port = th->dest;
- ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+ ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq6->loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -237,9 +237,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+ fl6.daddr = ireq6->rmt_addr;
final_p = fl6_update_dst(&fl6, np->opt, &final);
- ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+ fl6.saddr = ireq6->loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = inet_rsk(req)->rmt_port;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 36131d122a6f..906c7ca43542 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -62,6 +62,7 @@
#include <net/netdma.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
#include <asm/uaccess.h>
@@ -153,7 +154,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+ usin->sin6_addr = flowlabel->dst;
fl6_sock_release(flowlabel);
}
}
@@ -195,7 +196,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tp->write_seq = 0;
}
- ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+ np->daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
/*
@@ -244,9 +245,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
saddr = &np->rcv_saddr;
fl6.flowi6_proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
- ipv6_addr_copy(&fl6.saddr,
- (saddr ? saddr : &np->saddr));
+ fl6.daddr = np->daddr;
+ fl6.saddr = saddr ? *saddr : np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
@@ -264,11 +264,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (saddr == NULL) {
saddr = &fl6.saddr;
- ipv6_addr_copy(&np->rcv_saddr, saddr);
+ np->rcv_saddr = *saddr;
}
/* set the source address */
- ipv6_addr_copy(&np->saddr, saddr);
+ np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
@@ -398,8 +398,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
*/
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl6.daddr, &np->daddr);
- ipv6_addr_copy(&fl6.saddr, &np->saddr);
+ fl6.daddr = np->daddr;
+ fl6.saddr = np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = inet->inet_dport;
@@ -489,8 +489,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
- ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
+ fl6.daddr = treq->rmt_addr;
+ fl6.saddr = treq->loc_addr;
fl6.flowlabel = 0;
fl6.flowi6_oif = treq->iif;
fl6.flowi6_mark = sk->sk_mark;
@@ -512,7 +512,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
- ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
+ fl6.daddr = treq->rmt_addr;
err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
err = net_xmit_eval(err);
}
@@ -617,8 +617,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
tp->md5sig_info->alloced6++;
}
- ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
- peer);
+ tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer;
tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
@@ -750,8 +749,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
bp = &hp->md5_blk.ip6;
/* 1. TCP pseudo-header (RFC2460) */
- ipv6_addr_copy(&bp->saddr, saddr);
- ipv6_addr_copy(&bp->daddr, daddr);
+ bp->saddr = *saddr;
+ bp->daddr = *daddr;
bp->protocol = cpu_to_be32(IPPROTO_TCP);
bp->len = cpu_to_be32(nbytes);
@@ -1039,8 +1038,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
#endif
memset(&fl6, 0, sizeof(fl6));
- ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
+ fl6.daddr = ipv6_hdr(skb)->saddr;
+ fl6.saddr = ipv6_hdr(skb)->daddr;
buff->ip_summed = CHECKSUM_PARTIAL;
buff->csum = 0;
@@ -1250,11 +1249,18 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_openreq_init(req, &tmp_opt, skb);
treq = inet6_rsk(req);
- ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
+ treq->rmt_addr = ipv6_hdr(skb)->saddr;
+ treq->loc_addr = ipv6_hdr(skb)->daddr;
if (!want_cookie || tmp_opt.tstamp_ok)
TCP_ECN_create_request(req, tcp_hdr(skb));
+ treq->iif = sk->sk_bound_dev_if;
+
+ /* So that link locals have meaning */
+ if (!sk->sk_bound_dev_if &&
+ ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ treq->iif = inet6_iif(skb);
+
if (!isn) {
struct inet_peer *peer = NULL;
@@ -1264,12 +1270,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
atomic_inc(&skb->users);
treq->pktopts = skb;
}
- treq->iif = sk->sk_bound_dev_if;
-
- /* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- treq->iif = inet6_iif(skb);
if (want_cookie) {
isn = cookie_v6_init_sequence(sk, skb, &req->mss);
@@ -1380,7 +1380,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
- ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+ newnp->rcv_saddr = newnp->saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1444,9 +1444,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
- ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
- ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
+ newnp->daddr = treq->rmt_addr;
+ newnp->saddr = treq->loc_addr;
+ newnp->rcv_saddr = treq->loc_addr;
newsk->sk_bound_dev_if = treq->iif;
/* Now IPv6 options...
@@ -1995,7 +1995,8 @@ static int tcp_v6_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
- percpu_counter_inc(&tcp_sockets_allocated);
+ sock_update_memcg(sk);
+ sk_sockets_allocated_inc(sk);
local_bh_enable();
return 0;
@@ -2214,7 +2215,6 @@ struct proto tcpv6_prot = {
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
- .sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem = sysctl_tcp_wmem,
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
@@ -2228,6 +2228,9 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ .proto_cgroup = tcp_proto_cgroup,
+#endif
};
static const struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 846f4757eb8d..4f96b5c63685 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -238,7 +238,7 @@ exact_match:
return result;
}
-static struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, struct udp_table *udptable)
@@ -305,6 +305,7 @@ begin:
rcu_read_unlock();
return result;
}
+EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
@@ -340,7 +341,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
- unsigned int ulen;
+ unsigned int ulen, copied;
int peeked;
int err;
int is_udplite = IS_UDPLITE(sk);
@@ -363,9 +364,10 @@ try_again:
goto out;
ulen = skb->len - sizeof(struct udphdr);
- if (len > ulen)
- len = ulen;
- else if (len < ulen)
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
is_udp4 = (skb->protocol == htons(ETH_P_IP));
@@ -376,14 +378,14 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy.
*/
- if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
}
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov,len);
+ msg->msg_iov, copied );
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
@@ -417,8 +419,7 @@ try_again:
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin6->sin6_addr);
else {
- ipv6_addr_copy(&sin6->sin6_addr,
- &ipv6_hdr(skb)->saddr);
+ sin6->sin6_addr = ipv6_hdr(skb)->saddr;
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6->sin6_scope_id = IP6CB(skb)->iif;
}
@@ -432,7 +433,7 @@ try_again:
datagram_recv_ctl(sk, msg, skb);
}
- err = len;
+ err = copied;
if (flags & MSG_TRUNC)
err = ulen;
@@ -538,7 +539,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
goto drop;
}
- if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
+ skb_dst_drop(skb);
+ rc = sock_queue_rcv_skb(sk, skb);
+ if (rc < 0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
UDP6_INC_STATS_BH(sock_net(sk),
@@ -1113,11 +1116,11 @@ do_udp_sendmsg:
fl6.flowi6_proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
- ipv6_addr_copy(&fl6.daddr, daddr);
+ fl6.daddr = *daddr;
else
fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
- ipv6_addr_copy(&fl6.saddr, &np->saddr);
+ fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
final_p = fl6_update_dst(&fl6, opt, &final);
@@ -1298,7 +1301,8 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
return 0;
}
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 3437d7d4eed6..a81ce9450750 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -72,8 +72,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->nexthdr = IPPROTO_BEETPH;
}
- ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
- ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
return 0;
}
@@ -99,8 +99,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(skb->len - size);
- ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
- ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
+ ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
+ ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
err = 0;
out:
return err;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4d6edff0498f..261e6e6f487e 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -55,8 +55,8 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
- ipv6_addr_copy(&top_iph->saddr, (const struct in6_addr *)&x->props.saddr);
- ipv6_addr_copy(&top_iph->daddr, (const struct in6_addr *)&x->id.daddr);
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
return 0;
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index faae41737fca..4eeff89c1aaa 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -49,7 +49,7 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
struct sock *sk = skb->sk;
fl6.flowi6_oif = sk->sk_bound_dev_if;
- ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+ fl6.daddr = ipv6_hdr(skb)->daddr;
ipv6_local_rxpmtu(sk, &fl6, mtu);
}
@@ -60,7 +60,7 @@ static void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
struct sock *sk = skb->sk;
fl6.fl6_dport = inet_sk(sk)->inet_dport;
- ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+ fl6.daddr = ipv6_hdr(skb)->daddr;
ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d879f7efbd10..8ea65e032733 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -132,8 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
- ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr);
- ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr);
+ fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+ fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
while (nh + offset + 1 < skb->data ||
pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index f2d72b8a3faa..3f2f7c4ab721 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -27,8 +27,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
/* Initialize temporary selector matching only
* to current session. */
- ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr);
- ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr);
+ *(struct in6_addr *)&sel->daddr = fl6->daddr;
+ *(struct in6_addr *)&sel->saddr = fl6->saddr;
sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
sel->dport_mask = htons(0xffff);
sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index c24f25ab67d3..bb14c3477680 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2558,8 +2558,8 @@ bed:
self->errno = 0;
setup_timer(&self->watchdog, irda_discovery_timeout,
(unsigned long)self);
- self->watchdog.expires = jiffies + (val * HZ/1000);
- add_timer(&(self->watchdog));
+ mod_timer(&self->watchdog,
+ jiffies + msecs_to_jiffies(val));
/* Wait for IR-LMP to call us back */
__wait_event_interruptible(self->query_wait,
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index 779117636270..579617cca125 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -67,7 +67,7 @@ static void *ckey;
static void *skey;
/* Module parameters */
-static int eth; /* Use "eth" or "irlan" name for devices */
+static bool eth; /* Use "eth" or "irlan" name for devices */
static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */
#ifdef CONFIG_PROC_FS
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 32e3bb026110..5c93f2952b08 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1461,14 +1461,12 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance)
}
/* Allocate a new instance */
- new = kmalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
+ new = kmemdup(orig, sizeof(struct tsap_cb), GFP_ATOMIC);
if (!new) {
IRDA_DEBUG(0, "%s(), unable to kmalloc\n", __func__);
spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
return NULL;
}
- /* Dup */
- memcpy(new, orig, sizeof(struct tsap_cb));
spin_lock_init(&new->lock);
/* We don't need the old instance any more */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 274d150320c0..d5c5b8fd1d01 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -130,6 +130,17 @@ static inline void low_nmcpy(unsigned char *dst, char *src)
memcpy(&dst[8], src, 8);
}
+static void iucv_skb_queue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(list)) != NULL) {
+ if (skb->dev)
+ dev_put(skb->dev);
+ kfree_skb(skb);
+ }
+}
+
static int afiucv_pm_prepare(struct device *dev)
{
#ifdef CONFIG_PM_DEBUG
@@ -164,10 +175,9 @@ static int afiucv_pm_freeze(struct device *dev)
read_lock(&iucv_sk_list.lock);
sk_for_each(sk, node, &iucv_sk_list.head) {
iucv = iucv_sk(sk);
- skb_queue_purge(&iucv->send_skb_q);
+ iucv_skb_queue_purge(&iucv->send_skb_q);
skb_queue_purge(&iucv->backlog_skb_q);
switch (sk->sk_state) {
- case IUCV_SEVERED:
case IUCV_DISCONN:
case IUCV_CLOSING:
case IUCV_CONNECTED:
@@ -212,7 +222,6 @@ static int afiucv_pm_restore_thaw(struct device *dev)
sk->sk_state_change(sk);
break;
case IUCV_DISCONN:
- case IUCV_SEVERED:
case IUCV_CLOSING:
case IUCV_LISTEN:
case IUCV_BOUND:
@@ -366,9 +375,7 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
if (imsg)
memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
- rcu_read_lock();
- skb->dev = dev_get_by_index_rcu(net, sock->sk_bound_dev_if);
- rcu_read_unlock();
+ skb->dev = dev_get_by_index(net, sock->sk_bound_dev_if);
if (!skb->dev)
return -ENODEV;
if (!(skb->dev->flags & IFF_UP))
@@ -388,6 +395,7 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
err = dev_queue_xmit(skb);
if (err) {
skb_unlink(nskb, &iucv->send_skb_q);
+ dev_put(nskb->dev);
kfree_skb(nskb);
} else {
atomic_sub(confirm_recv, &iucv->msg_recv);
@@ -396,25 +404,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
return err;
}
-/* Timers */
-static void iucv_sock_timeout(unsigned long arg)
-{
- struct sock *sk = (struct sock *)arg;
-
- bh_lock_sock(sk);
- sk->sk_err = ETIMEDOUT;
- sk->sk_state_change(sk);
- bh_unlock_sock(sk);
-
- iucv_sock_kill(sk);
- sock_put(sk);
-}
-
-static void iucv_sock_clear_timer(struct sock *sk)
-{
- sk_stop_timer(sk, &sk->sk_timer);
-}
-
static struct sock *__iucv_get_sock_by_name(char *nm)
{
struct sock *sk;
@@ -467,7 +456,6 @@ static void iucv_sock_close(struct sock *sk)
int err, blen;
struct sk_buff *skb;
- iucv_sock_clear_timer(sk);
lock_sock(sk);
switch (sk->sk_state) {
@@ -481,16 +469,14 @@ static void iucv_sock_close(struct sock *sk)
blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
skb = sock_alloc_send_skb(sk, blen, 1, &err);
if (skb) {
- skb_reserve(skb,
- sizeof(struct af_iucv_trans_hdr) +
- ETH_HLEN);
+ skb_reserve(skb, blen);
err = afiucv_hs_send(NULL, sk, skb,
AF_IUCV_FLAG_FIN);
}
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
}
- case IUCV_DISCONN:
+ case IUCV_DISCONN: /* fall through */
sk->sk_state = IUCV_CLOSING;
sk->sk_state_change(sk);
@@ -520,7 +506,7 @@ static void iucv_sock_close(struct sock *sk)
sk->sk_err = ECONNRESET;
sk->sk_state_change(sk);
- skb_queue_purge(&iucv->send_skb_q);
+ iucv_skb_queue_purge(&iucv->send_skb_q);
skb_queue_purge(&iucv->backlog_skb_q);
break;
@@ -581,8 +567,6 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
sk->sk_protocol = proto;
sk->sk_state = IUCV_OPEN;
- setup_timer(&sk->sk_timer, iucv_sock_timeout, (unsigned long)sk);
-
iucv_sock_link(&iucv_sk_list, sk);
return sk;
}
@@ -675,16 +659,12 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
}
if (sk->sk_state == IUCV_CONNECTED ||
- sk->sk_state == IUCV_SEVERED ||
- sk->sk_state == IUCV_DISCONN || /* due to PM restore */
+ sk->sk_state == IUCV_DISCONN ||
!newsock) {
iucv_accept_unlink(sk);
if (newsock)
sock_graft(sk, newsock);
- if (sk->sk_state == IUCV_SEVERED)
- sk->sk_state = IUCV_DISCONN;
-
release_sock(sk);
return sk;
}
@@ -739,7 +719,7 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!memcmp(dev->perm_addr, uid, 8)) {
memcpy(iucv->src_name, sa->siucv_name, 8);
memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
- sock->sk->sk_bound_dev_if = dev->ifindex;
+ sk->sk_bound_dev_if = dev->ifindex;
sk->sk_state = IUCV_BOUND;
iucv->transport = AF_IUCV_TRANS_HIPER;
if (!iucv->msglimit)
@@ -774,16 +754,13 @@ done:
static int iucv_sock_autobind(struct sock *sk)
{
struct iucv_sock *iucv = iucv_sk(sk);
- char query_buffer[80];
char name[12];
int err = 0;
- /* Set the userid and name */
- cpcmd("QUERY USERID", query_buffer, sizeof(query_buffer), &err);
- if (unlikely(err))
+ if (unlikely(!pr_iucv))
return -EPROTO;
- memcpy(iucv->src_user_id, query_buffer, 8);
+ memcpy(iucv->src_user_id, iucv_userid, 8);
write_lock_bh(&iucv_sk_list.lock);
@@ -1225,6 +1202,8 @@ release:
return len;
fail:
+ if (skb->dev)
+ dev_put(skb->dev);
kfree_skb(skb);
out:
release_sock(sk);
@@ -1357,7 +1336,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
int blen;
int err = 0;
- if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
+ if ((sk->sk_state == IUCV_DISCONN) &&
skb_queue_empty(&iucv->backlog_skb_q) &&
skb_queue_empty(&sk->sk_receive_queue) &&
list_empty(&iucv->message_q.list))
@@ -1441,9 +1420,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
ETH_HLEN;
sskb = sock_alloc_send_skb(sk, blen, 1, &err);
if (sskb) {
- skb_reserve(sskb,
- sizeof(struct af_iucv_trans_hdr)
- + ETH_HLEN);
+ skb_reserve(sskb, blen);
err = afiucv_hs_send(NULL, sk, sskb,
AF_IUCV_FLAG_WIN);
}
@@ -1506,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
if (sk->sk_state == IUCV_CLOSED)
mask |= POLLHUP;
- if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED)
+ if (sk->sk_state == IUCV_DISCONN)
mask |= POLLIN;
if (sock_writeable(sk))
@@ -1533,7 +1510,6 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
switch (sk->sk_state) {
case IUCV_DISCONN:
case IUCV_CLOSING:
- case IUCV_SEVERED:
case IUCV_CLOSED:
err = -ENOTCONN;
goto fail;
@@ -1888,10 +1864,7 @@ static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16])
{
struct sock *sk = path->private;
- if (!list_empty(&iucv_sk(sk)->accept_q))
- sk->sk_state = IUCV_SEVERED;
- else
- sk->sk_state = IUCV_DISCONN;
+ sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
}
@@ -2051,10 +2024,7 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
/* other end of connection closed */
if (iucv) {
bh_lock_sock(sk);
- if (!list_empty(&iucv->accept_q))
- sk->sk_state = IUCV_SEVERED;
- else
- sk->sk_state = IUCV_DISCONN;
+ sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
}
@@ -2209,6 +2179,8 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
break;
case 0:
/* plain data frame */
+ memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class,
+ CB_TRGCLS_LEN);
err = afiucv_hs_callback_rx(sk, skb);
break;
default:
@@ -2259,6 +2231,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
case TX_NOTIFY_OK:
__skb_unlink(this, list);
iucv_sock_wake_msglim(sk);
+ dev_put(this->dev);
kfree_skb(this);
break;
case TX_NOTIFY_PENDING:
@@ -2269,6 +2242,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
atomic_dec(&iucv->pendings);
if (atomic_read(&iucv->pendings) <= 0)
iucv_sock_wake_msglim(sk);
+ dev_put(this->dev);
kfree_skb(this);
break;
case TX_NOTIFY_UNREACHABLE:
@@ -2277,11 +2251,9 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
case TX_NOTIFY_GENERALERROR:
case TX_NOTIFY_DELAYED_GENERALERROR:
__skb_unlink(this, list);
+ dev_put(this->dev);
kfree_skb(this);
- if (!list_empty(&iucv->accept_q))
- sk->sk_state = IUCV_SEVERED;
- else
- sk->sk_state = IUCV_DISCONN;
+ sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
break;
}
@@ -2291,6 +2263,13 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
}
spin_unlock_irqrestore(&list->lock, flags);
+ if (sk->sk_state == IUCV_CLOSING) {
+ if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
+ sk->sk_state = IUCV_CLOSED;
+ sk->sk_state_change(sk);
+ }
+ }
+
out_unlock:
bh_unlock_sock(sk);
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1e733e9073d0..11dbb2255ccb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -375,7 +375,7 @@ static int verify_address_len(const void *p)
const struct sadb_address *sp = p;
const struct sockaddr *addr = (const struct sockaddr *)(sp + 1);
const struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
const struct sockaddr_in6 *sin6;
#endif
int len;
@@ -387,7 +387,7 @@ static int verify_address_len(const void *p)
sp->sadb_address_prefixlen > 32)
return -EINVAL;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t));
if (sp->sadb_address_len != len ||
@@ -469,7 +469,7 @@ static int present_and_same_family(const struct sadb_address *src,
if (s_addr->sa_family != d_addr->sa_family)
return 0;
if (s_addr->sa_family != AF_INET
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
&& s_addr->sa_family != AF_INET6
#endif
)
@@ -579,7 +579,7 @@ static inline int pfkey_sockaddr_len(sa_family_t family)
switch (family) {
case AF_INET:
return sizeof(struct sockaddr_in);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return sizeof(struct sockaddr_in6);
#endif
@@ -595,7 +595,7 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
xaddr->a4 =
((struct sockaddr_in *)sa)->sin_addr.s_addr;
return AF_INET;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
memcpy(xaddr->a6,
&((struct sockaddr_in6 *)sa)->sin6_addr,
@@ -639,7 +639,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct
case AF_INET:
xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr;
break;
@@ -705,14 +705,14 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
return 32;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
{
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
sin6->sin6_family = AF_INET6;
sin6->sin6_port = port;
sin6->sin6_flowinfo = 0;
- ipv6_addr_copy(&sin6->sin6_addr, (const struct in6_addr *)xaddr->a6);
+ sin6->sin6_addr = *(struct in6_addr *)xaddr->a6;
sin6->sin6_scope_id = 0;
return 128;
}
@@ -1311,7 +1311,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr;
xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr;
xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr;
@@ -3146,7 +3146,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
return NULL;
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
if (opt != IPV6_IPSEC_POLICY) {
*dir = -EOPNOTSUPP;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index bf8d50c67931..89ff8c67943e 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -756,9 +756,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
goto error;
}
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
/* Trace packet contents, if enabled */
if (tunnel->debug & L2TP_MSG_DATA) {
length = min(32u, skb->len);
@@ -769,12 +766,15 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
offset = 0;
do {
- printk(" %02X", ptr[offset]);
+ printk(" %02X", skb->data[offset]);
} while (++offset < length);
printk("\n");
}
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+
/* Get L2TP header flags */
hdrflags = ntohs(*(__be16 *) ptr);
@@ -1072,7 +1072,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Get routing info from the tunnel socket */
skb_dst_drop(skb);
- skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
+ skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
inet = inet_sk(sk);
fl = &inet->cork.fl;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index dfd3a648a551..a18e6c3d36e3 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -833,15 +833,15 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
copied += used;
len -= used;
+ /* For non stream protcols we get one packet per recvmsg call */
+ if (sk->sk_type != SOCK_STREAM)
+ goto copy_uaddr;
+
if (!(flags & MSG_PEEK)) {
sk_eat_skb(sk, skb, 0);
*seq = 0;
}
- /* For non stream protcols we get one packet per recvmsg call */
- if (sk->sk_type != SOCK_STREAM)
- goto copy_uaddr;
-
/* Partial read */
if (used + offset < skb->len)
continue;
@@ -857,6 +857,12 @@ copy_uaddr:
}
if (llc_sk(sk)->cmsg_flags)
llc_cmsg_rcv(msg, skb);
+
+ if (!(flags & MSG_PEEK)) {
+ sk_eat_skb(sk, skb, 0);
+ *seq = 0;
+ }
+
goto out;
}
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index aeda65466f3e..502d3ecc4a79 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -318,7 +318,7 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
rinfo[i].diff = i * pinfo->norm_offset;
}
for (i = 1; i < sband->n_bitrates; i++) {
- s = 0;
+ s = false;
for (j = 0; j < sband->n_bitrates - i; j++)
if (unlikely(sband->bitrates[rinfo[j].index].bitrate >
sband->bitrates[rinfo[j + 1].index].bitrate)) {
@@ -327,7 +327,7 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
rinfo[j + 1].index = tmp;
rinfo[rinfo[j].index].rev_index = j;
rinfo[rinfo[j + 1].index].rev_index = j + 1;
- s = 1;
+ s = true;
}
if (!s)
break;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 2c5041cc71f8..2c9b493af249 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -106,7 +106,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
/* save the ERP value so that it is available at association time */
if (elems->erp_info && elems->erp_info_len >= 1) {
bss->erp_value = elems->erp_info[0];
- bss->has_erp_value = 1;
+ bss->has_erp_value = true;
}
if (elems->tim) {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8260b13d93c9..f8ac4ef0b794 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -4,6 +4,14 @@ menu "Core Netfilter Configuration"
config NETFILTER_NETLINK
tristate
+config NETFILTER_NETLINK_ACCT
+tristate "Netfilter NFACCT over NFNETLINK interface"
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_NETLINK
+ help
+ If this option is enabled, the kernel will include support
+ for extended accounting via NFNETLINK.
+
config NETFILTER_NETLINK_QUEUE
tristate "Netfilter NFQUEUE over NFNETLINK interface"
depends on NETFILTER_ADVANCED
@@ -75,6 +83,16 @@ config NF_CONNTRACK_ZONES
If unsure, say `N'.
+config NF_CONNTRACK_PROCFS
+ bool "Supply CT list in procfs (OBSOLETE)"
+ default y
+ depends on PROC_FS
+ ---help---
+ This option enables for the list of known conntrack entries
+ to be shown in procfs under net/netfilter/nf_conntrack. This
+ is considered obsolete in favor of using the conntrack(8)
+ tool which uses Netlink.
+
config NF_CONNTRACK_EVENTS
bool "Connection tracking events"
depends on NETFILTER_ADVANCED
@@ -201,7 +219,6 @@ config NF_CONNTRACK_BROADCAST
config NF_CONNTRACK_NETBIOS_NS
tristate "NetBIOS name service protocol support"
- depends on NETFILTER_ADVANCED
select NF_CONNTRACK_BROADCAST
help
NetBIOS name service requests are sent as broadcast messages from an
@@ -542,7 +559,6 @@ config NETFILTER_XT_TARGET_NOTRACK
tristate '"NOTRACK" target support'
depends on IP_NF_RAW || IP6_NF_RAW
depends on NF_CONNTRACK
- depends on NETFILTER_ADVANCED
help
The NOTRACK target allows a select rule to specify
which packets *not* to enter the conntrack/NAT
@@ -772,6 +788,15 @@ config NETFILTER_XT_MATCH_DSCP
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_MATCH_ECN
+ tristate '"ecn" match support'
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option adds an "ECN" match, which allows you to match against
+ the IPv4 and TCP header ECN fields.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_MATCH_ESP
tristate '"esp" match support'
depends on NETFILTER_ADVANCED
@@ -881,6 +906,16 @@ config NETFILTER_XT_MATCH_MULTIPORT
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_MATCH_NFACCT
+ tristate '"nfacct" match support'
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_NETLINK_ACCT
+ help
+ This option allows you to use the extended accounting through
+ nfnetlink_acct.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_MATCH_OSF
tristate '"osf" Passive OS fingerprint match'
depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1a02853df863..40f4c3d636c5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
obj-$(CONFIG_NETFILTER) = netfilter.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
@@ -80,6 +81,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_ECN) += xt_ecn.o
obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
@@ -90,6 +92,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index afca6c78948c..b4e8ff05b301 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -54,6 +54,12 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
+
+#if defined(CONFIG_JUMP_LABEL)
+struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+EXPORT_SYMBOL(nf_hooks_needed);
+#endif
+
static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg)
@@ -70,6 +76,9 @@ int nf_register_hook(struct nf_hook_ops *reg)
}
list_add_rcu(&reg->list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
+#if defined(CONFIG_JUMP_LABEL)
+ jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
+#endif
return 0;
}
EXPORT_SYMBOL(nf_register_hook);
@@ -79,7 +88,9 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list);
mutex_unlock(&nf_hook_mutex);
-
+#if defined(CONFIG_JUMP_LABEL)
+ jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+#endif
synchronize_net();
}
EXPORT_SYMBOL(nf_unregister_hook);
@@ -218,7 +229,7 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
}
EXPORT_SYMBOL(skb_make_writable);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 052579fe389a..1f03556666f4 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -109,16 +109,18 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
}
EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
bool
ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
__be16 *port, u8 *proto)
{
int protoff;
u8 nexthdr;
+ __be16 frag_off;
nexthdr = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
if (protoff < 0)
return false;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f2d576e6b769..4015fcaf87bc 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -241,7 +241,7 @@ hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
static inline void
hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
{
- ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+ dst->ip.in6 = src->ip.in6;
}
static inline void
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 6ee10f5d59bd..37d667e3f6f8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -158,7 +158,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem data = { };
- u32 ip, ip_to, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to;
u32 timeout = h->timeout;
bool with_ports = false;
int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index fb90e344e907..e69e2718fbe1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -162,7 +162,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem data = { };
- u32 ip, ip_to, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to;
u32 timeout = h->timeout;
bool with_ports = false;
int ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index deb3e3dfa5fc..64199b4e93c9 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -184,7 +184,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
- u32 ip, ip_to, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to, ip2_last, ip2;
u32 timeout = h->timeout;
bool with_ports = false;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 60d016541c58..28988196775e 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -267,7 +267,7 @@ static inline void
hash_net6_data_copy(struct hash_net6_elem *dst,
const struct hash_net6_elem *src)
{
- ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+ dst->ip.in6 = src->ip.in6;
dst->cidr = src->cidr;
}
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 70bd1d0774c6..af4c0b8c5275 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -232,6 +232,21 @@ config IP_VS_NQ
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+comment 'IPVS SH scheduler'
+
+config IP_VS_SH_TAB_BITS
+ int "IPVS source hashing table size (the Nth power of 2)"
+ range 4 20
+ default 8
+ ---help---
+ The source hashing scheduler maps source IPs to destinations
+ stored in a hash table. This table is tiled by each destination
+ until all slots in the table are filled. When using weights to
+ allow destinations to receive more connections, the table is
+ tiled an amount proportional to the weights specified. The table
+ needs to be large enough to effectively fit all the destinations
+ multiplied by their respective weights.
+
comment 'IPVS application helper'
config IP_VS_FTP
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 12571fb2881c..29fa5badde75 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -616,7 +616,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
if ((cp) && (!cp->dest)) {
dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
- cp->protocol, cp->fwmark);
+ cp->protocol, cp->fwmark, cp->flags);
ip_vs_bind_dest(cp, dest);
return dest;
} else
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 093cc327020f..611c3359b94d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -983,7 +983,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
if (!cp)
return NF_ACCEPT;
- ipv6_addr_copy(&snet.in6, &iph->saddr);
+ snet.in6 = iph->saddr;
return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
pp, offset, sizeof(struct ipv6hdr));
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 008bf97cc91a..b3afe189af61 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -85,7 +85,7 @@ static int __ip_vs_addr_is_local_v6(struct net *net,
};
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
- if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
+ if (rt && rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
return 1;
return 0;
@@ -619,15 +619,21 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
- __be16 vport, __u16 protocol, __u32 fwmark)
+ __be16 vport, __u16 protocol, __u32 fwmark,
+ __u32 flags)
{
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
+ __be16 port = dport;
svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
- dest = ip_vs_lookup_dest(svc, daddr, dport);
+ if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
+ port = 0;
+ dest = ip_vs_lookup_dest(svc, daddr, port);
+ if (!dest)
+ dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
if (dest)
atomic_inc(&dest->refcnt);
ip_vs_service_put(svc);
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 13d607ae9c52..1aa5cac748c4 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -108,7 +108,7 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,
struct ip_vs_conn *ct)
{
- bool ret = 0;
+ bool ret = false;
if (ct->af == p->af &&
ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) &&
@@ -121,7 +121,7 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,
ct->protocol == p->protocol &&
ct->pe_data && ct->pe_data_len == p->pe_data_len &&
!memcmp(ct->pe_data, p->pe_data, p->pe_data_len))
- ret = 1;
+ ret = true;
IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n",
ip_vs_proto_name(p->protocol),
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 33815f4fb451..069e8d4d5c01 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -30,6 +30,11 @@
* server is dead or overloaded, the load balancer can bypass the cache
* server and send requests to the original server directly.
*
+ * The weight destination attribute can be used to control the
+ * distribution of connections to the destinations in servernode. The
+ * greater the weight, the more connections the destination
+ * will receive.
+ *
*/
#define KMSG_COMPONENT "IPVS"
@@ -99,9 +104,11 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
struct ip_vs_sh_bucket *b;
struct list_head *p;
struct ip_vs_dest *dest;
+ int d_count;
b = tbl;
p = &svc->destinations;
+ d_count = 0;
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
if (list_empty(p)) {
b->dest = NULL;
@@ -113,7 +120,16 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
atomic_inc(&dest->refcnt);
b->dest = dest;
- p = p->next;
+ IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
+ i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ atomic_read(&dest->weight));
+
+ /* Don't move to next dest until filling weight */
+ if (++d_count >= atomic_read(&dest->weight)) {
+ p = p->next;
+ d_count = 0;
+ }
+
}
b++;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3cdd479f9b5d..8a0d6d6889f0 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -603,9 +603,9 @@ sloop:
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6) {
p += sizeof(struct ip_vs_sync_v6);
- ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
- ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
- ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+ s->v6.caddr = cp->caddr.in6;
+ s->v6.vaddr = cp->vaddr.in6;
+ s->v6.daddr = cp->daddr.in6;
} else
#endif
{
@@ -740,7 +740,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* but still handled.
*/
dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
- param->vport, protocol, fwmark);
+ param->vport, protocol, fwmark, flags);
/* Set the approprite ativity flag */
if (protocol == IPPROTO_TCP) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index aa2d7206ee8a..7fd66dec859d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -207,7 +207,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
{
- return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
+ return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
}
static struct dst_entry *
@@ -235,7 +235,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
goto out_err;
}
}
- ipv6_addr_copy(ret_saddr, &fl6.saddr);
+ *ret_saddr = fl6.saddr;
return dst;
out_err:
@@ -279,7 +279,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
atomic_read(&rt->dst.__refcnt));
}
if (ret_saddr)
- ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6);
+ *ret_saddr = dest->dst_saddr.in6;
spin_unlock(&dest->dst_lock);
} else {
dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
@@ -541,7 +541,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
@@ -658,7 +658,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
@@ -705,7 +705,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error;
- ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
+ ipv6_hdr(skb)->daddr = cp->daddr.in6;
if (!local || !skb->dev) {
/* drop the old route when skb is not shared */
@@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
iph->priority = old_iph->priority;
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
- ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
- ipv6_addr_copy(&iph->saddr, &saddr);
+ iph->daddr = cp->daddr.in6;
+ iph->saddr = saddr;
iph->hop_limit = old_iph->hop_limit;
/* Another hack: avoid icmp_send in ip_fragment */
@@ -1173,7 +1173,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
@@ -1293,7 +1293,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
*/
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (cp->flags & IP_VS_CONN_F_SYNC && local) {
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 369df3f08d42..f4f8cda05986 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -18,7 +18,7 @@
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
-static int nf_ct_acct __read_mostly;
+static bool nf_ct_acct __read_mostly;
module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
@@ -46,8 +46,8 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
return 0;
return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)acct[dir].packets,
- (unsigned long long)acct[dir].bytes);
+ (unsigned long long)atomic64_read(&acct[dir].packets),
+ (unsigned long long)atomic64_read(&acct[dir].bytes));
};
EXPORT_SYMBOL_GPL(seq_print_acct);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7202b0631cd6..e875f8902db3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
unsigned int nf_conntrack_hash_rnd __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
{
@@ -1044,10 +1045,8 @@ acct:
acct = nf_conn_acct_find(ct);
if (acct) {
- spin_lock_bh(&ct->lock);
- acct[CTINFO2DIR(ctinfo)].packets++;
- acct[CTINFO2DIR(ctinfo)].bytes += skb->len;
- spin_unlock_bh(&ct->lock);
+ atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
+ atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes);
}
}
}
@@ -1063,11 +1062,9 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
acct = nf_conn_acct_find(ct);
if (acct) {
- spin_lock_bh(&ct->lock);
- acct[CTINFO2DIR(ctinfo)].packets++;
- acct[CTINFO2DIR(ctinfo)].bytes +=
- skb->len - skb_network_offset(skb);
- spin_unlock_bh(&ct->lock);
+ atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
+ atomic64_add(skb->len - skb_network_offset(skb),
+ &acct[CTINFO2DIR(ctinfo)].bytes);
}
}
@@ -1087,7 +1084,7 @@ static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
};
#endif
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -1342,8 +1339,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
get_order(sz));
if (!hash) {
printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
- hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ hash = vzalloc(sz);
}
if (hash && nulls)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 6b368be937c6..b62c4148b921 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -27,22 +27,17 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
-struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
-
-struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly;
-EXPORT_SYMBOL_GPL(nf_expect_event_cb);
-
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
unsigned long events;
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
rcu_read_lock();
- notify = rcu_dereference(nf_conntrack_event_cb);
+ notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (notify == NULL)
goto out_unlock;
@@ -83,19 +78,20 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
-int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
+int nf_conntrack_register_notifier(struct net *net,
+ struct nf_ct_event_notifier *new)
{
int ret = 0;
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(nf_conntrack_event_cb,
+ notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
if (notify != NULL) {
ret = -EBUSY;
goto out_unlock;
}
- RCU_INIT_POINTER(nf_conntrack_event_cb, new);
+ RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, new);
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
@@ -105,32 +101,34 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
+void nf_conntrack_unregister_notifier(struct net *net,
+ struct nf_ct_event_notifier *new)
{
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(nf_conntrack_event_cb,
+ notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
- RCU_INIT_POINTER(nf_conntrack_event_cb, NULL);
+ RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
+int nf_ct_expect_register_notifier(struct net *net,
+ struct nf_exp_event_notifier *new)
{
int ret = 0;
struct nf_exp_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(nf_expect_event_cb,
+ notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
if (notify != NULL) {
ret = -EBUSY;
goto out_unlock;
}
- RCU_INIT_POINTER(nf_expect_event_cb, new);
+ RCU_INIT_POINTER(net->ct.nf_expect_event_cb, new);
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
@@ -140,15 +138,16 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
+void nf_ct_expect_unregister_notifier(struct net *net,
+ struct nf_exp_event_notifier *new)
{
struct nf_exp_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(nf_expect_event_cb,
+ notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
- RCU_INIT_POINTER(nf_expect_event_cb, NULL);
+ RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 340c80d968d4..4147ba3f653c 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
-static HLIST_HEAD(nf_ct_userspace_expect_list);
-
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 pid, int report)
@@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
+ NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
net->ct.expect_count--;
hlist_del(&exp->lnode);
- if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
- master_help->expecting[exp->class]--;
+ master_help->expecting[exp->class]--;
nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
nf_ct_expect_put(exp);
@@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
-static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
+static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(exp);
- const struct nf_conntrack_expect_policy *p;
unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
/* two references : one for hash insert, one for the timer */
atomic_add(2, &exp->use);
- if (master_help) {
- hlist_add_head(&exp->lnode, &master_help->expectations);
- master_help->expecting[exp->class]++;
- } else if (exp->flags & NF_CT_EXPECT_USERSPACE)
- hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
+ hlist_add_head(&exp->lnode, &master_help->expectations);
+ master_help->expecting[exp->class]++;
hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
net->ct.expect_count++;
setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
(unsigned long)exp);
- if (master_help) {
- p = &rcu_dereference_protected(
- master_help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- )->expect_policy[exp->class];
- exp->timeout.expires = jiffies + p->timeout * HZ;
+ helper = rcu_dereference_protected(master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock));
+ if (helper) {
+ exp->timeout.expires = jiffies +
+ helper->expect_policy[exp->class].timeout * HZ;
}
add_timer(&exp->timeout);
NF_CT_STAT_INC(net, expect_create);
+ return 0;
}
/* Race with expectations being used means we could have none to find; OK. */
@@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
struct nf_conntrack_expect *i;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
+ struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(expect);
struct hlist_node *n;
unsigned int h;
int ret = 1;
- /* Don't allow expectations created from kernel-space with no helper */
- if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
- (!master_help || (master_help && !master_help->helper))) {
+ if (!master_help) {
ret = -ESHUTDOWN;
goto out;
}
@@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
}
/* Will be over limit? */
- if (master_help) {
- p = &rcu_dereference_protected(
- master_help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- )->expect_policy[expect->class];
+ helper = rcu_dereference_protected(master_help->helper,
+ lockdep_is_held(&nf_conntrack_lock));
+ if (helper) {
+ p = &helper->expect_policy[expect->class];
if (p->max_expected &&
master_help->expecting[expect->class] >= p->max_expected) {
evict_oldest_expect(master, expect);
@@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
if (ret <= 0)
goto out;
- ret = 0;
- nf_ct_expect_insert(expect);
+ ret = nf_ct_expect_insert(expect);
+ if (ret < 0)
+ goto out;
spin_unlock_bh(&nf_conntrack_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
return ret;
@@ -461,22 +455,7 @@ out:
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
-void nf_ct_remove_userspace_expectations(void)
-{
- struct nf_conntrack_expect *exp;
- struct hlist_node *n, *next;
-
- hlist_for_each_entry_safe(exp, n, next,
- &nf_ct_userspace_expect_list, lnode) {
- if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
- }
- }
-}
-EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
-
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct ct_expect_iter_state {
struct seq_net_private p;
unsigned int bucket;
@@ -604,25 +583,25 @@ static const struct file_operations exp_file_ops = {
.llseek = seq_lseek,
.release = seq_release_net,
};
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
static int exp_proc_init(struct net *net)
{
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct proc_dir_entry *proc;
proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
if (!proc)
return -ENOMEM;
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
return 0;
}
static void exp_proc_remove(struct net *net)
{
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
proc_net_remove(net, "nf_conntrack_expect");
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
}
module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 6f5801eac999..8c5c95c6d34f 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -42,7 +42,7 @@ static u_int16_t ports[MAX_PORTS];
static unsigned int ports_c;
module_param_array(ports, ushort, &ports_c, 0400);
-static int loose;
+static bool loose;
module_param(loose, bool, 0600);
unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f03c2d4539f6..722291f8af72 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -42,7 +42,7 @@ static int gkrouted_only __read_mostly = 1;
module_param(gkrouted_only, int, 0600);
MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
-static int callforward_filter __read_mostly = 1;
+static bool callforward_filter __read_mostly = true;
module_param(callforward_filter, bool, 0600);
MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
"if both endpoints are on different sides "
@@ -743,17 +743,16 @@ static int callforward_do_filter(const union nf_inet_addr *src,
}
break;
}
-#if defined(CONFIG_NF_CONNTRACK_IPV6) || \
- defined(CONFIG_NF_CONNTRACK_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
case AF_INET6: {
struct flowi6 fl1, fl2;
struct rt6_info *rt1, *rt2;
memset(&fl1, 0, sizeof(fl1));
- ipv6_addr_copy(&fl1.daddr, &src->in6);
+ fl1.daddr = src->in6;
memset(&fl2, 0, sizeof(fl2));
- ipv6_addr_copy(&fl2.daddr, &dst->in6);
+ fl2.daddr = dst->in6;
if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
flowi6_to_flowi(&fl1), false)) {
if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 93c4bdbfc1ae..c9e0de08aa87 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
int ret = 0;
if (tmpl != NULL) {
+ /* we've got a userspace helper. */
+ if (tmpl->status & IPS_USERSPACE_HELPER) {
+ help = nf_ct_helper_ext_add(ct, flags);
+ if (help == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ rcu_assign_pointer(help->helper, NULL);
+ __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+ ret = 0;
+ goto out;
+ }
help = nfct_help(tmpl);
if (help != NULL)
helper = help->helper;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e58aa9b1fe8a..e07dc3ae930e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -4,7 +4,7 @@
* (C) 2001 by Jay Schulist <jschlst@samba.org>
* (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>
* (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2005-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
*
* Initial connection tracking via netlink development funded and
* generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -135,7 +135,7 @@ nla_put_failure:
static inline int
ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
{
- long timeout = (ct->timeout.expires - jiffies) / HZ;
+ long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
if (timeout < 0)
timeout = 0;
@@ -203,25 +203,18 @@ nla_put_failure:
}
static int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
- enum ip_conntrack_dir dir)
+dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes,
+ enum ip_conntrack_dir dir)
{
enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
struct nlattr *nest_count;
- const struct nf_conn_counter *acct;
-
- acct = nf_conn_acct_find(ct);
- if (!acct)
- return 0;
nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
if (!nest_count)
goto nla_put_failure;
- NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS,
- cpu_to_be64(acct[dir].packets));
- NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES,
- cpu_to_be64(acct[dir].bytes));
+ NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts));
+ NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes));
nla_nest_end(skb, nest_count);
@@ -232,6 +225,27 @@ nla_put_failure:
}
static int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
+ enum ip_conntrack_dir dir, int type)
+{
+ struct nf_conn_counter *acct;
+ u64 pkts, bytes;
+
+ acct = nf_conn_acct_find(ct);
+ if (!acct)
+ return 0;
+
+ if (type == IPCTNL_MSG_CT_GET_CTRZERO) {
+ pkts = atomic64_xchg(&acct[dir].packets, 0);
+ bytes = atomic64_xchg(&acct[dir].bytes, 0);
+ } else {
+ pkts = atomic64_read(&acct[dir].packets);
+ bytes = atomic64_read(&acct[dir].bytes);
+ }
+ return dump_counters(skb, pkts, bytes, dir);
+}
+
+static int
ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
{
struct nlattr *nest_count;
@@ -393,15 +407,15 @@ nla_put_failure:
}
static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event, struct nf_conn *ct)
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ struct nf_conn *ct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- unsigned int flags = pid ? NLM_F_MULTI : 0;
+ unsigned int flags = pid ? NLM_F_MULTI : 0, event;
- event |= NFNL_SUBSYS_CTNETLINK << 8;
+ event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -430,8 +444,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 ||
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
@@ -612,8 +626,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto nla_put_failure;
if (events & (1 << IPCT_DESTROY)) {
- if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ if (ctnetlink_dump_counters(skb, ct,
+ IP_CT_DIR_ORIGINAL, type) < 0 ||
+ ctnetlink_dump_counters(skb, ct,
+ IP_CT_DIR_REPLY, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
} else {
@@ -709,20 +725,13 @@ restart:
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, ct) < 0) {
+ NFNL_MSG_TYPE(
+ cb->nlh->nlmsg_type),
+ ct) < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
goto out;
}
-
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO) {
- struct nf_conn_counter *acct;
-
- acct = nf_conn_acct_find(ct);
- if (acct)
- memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
- }
}
if (cb->args[1]) {
cb->args[1] = 0;
@@ -1001,7 +1010,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, ct);
+ NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
rcu_read_unlock();
nf_ct_put(ct);
if (err <= 0)
@@ -1087,14 +1096,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
if (cda[CTA_NAT_DST]) {
ret = ctnetlink_parse_nat_setup(ct,
- IP_NAT_MANIP_DST,
+ NF_NAT_MANIP_DST,
cda[CTA_NAT_DST]);
if (ret < 0)
return ret;
}
if (cda[CTA_NAT_SRC]) {
ret = ctnetlink_parse_nat_setup(ct,
- IP_NAT_MANIP_SRC,
+ NF_NAT_MANIP_SRC,
cda[CTA_NAT_SRC]);
if (ret < 0)
return ret;
@@ -1358,12 +1367,15 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
nf_ct_protonum(ct));
if (helper == NULL) {
rcu_read_unlock();
+ spin_unlock_bh(&nf_conntrack_lock);
#ifdef CONFIG_MODULES
if (request_module("nfct-helper-%s", helpname) < 0) {
+ spin_lock_bh(&nf_conntrack_lock);
err = -EOPNOTSUPP;
goto err1;
}
+ spin_lock_bh(&nf_conntrack_lock);
rcu_read_lock();
helper = __nf_conntrack_helper_find(helpname,
nf_ct_l3num(ct),
@@ -1638,7 +1650,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
{
struct nf_conn *master = exp->master;
- long timeout = (exp->timeout.expires - jiffies) / HZ;
+ long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
struct nf_conn_help *help;
if (timeout < 0)
@@ -1847,7 +1859,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- if (cda[CTA_EXPECT_MASTER])
+ if (cda[CTA_EXPECT_TUPLE])
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ else if (cda[CTA_EXPECT_MASTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
else
return -EINVAL;
@@ -1869,25 +1883,30 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
err = -ENOMEM;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
+ if (skb2 == NULL) {
+ nf_ct_expect_put(exp);
goto out;
+ }
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
rcu_read_unlock();
+ nf_ct_expect_put(exp);
if (err <= 0)
goto free;
- nf_ct_expect_put(exp);
+ err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ if (err < 0)
+ goto out;
- return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ return 0;
free:
kfree_skb(skb2);
out:
- nf_ct_expect_put(exp);
- return err;
+ /* this avoids a loop in nfnetlink. */
+ return err == -EAGAIN ? -ENOBUFS : err;
}
static int
@@ -2023,6 +2042,10 @@ ctnetlink_create_expect(struct net *net, u16 zone,
}
help = nfct_help(ct);
if (!help) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
err = -EINVAL;
goto out;
@@ -2163,6 +2186,54 @@ MODULE_ALIAS("ip_conntrack_netlink");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
+static int __net_init ctnetlink_net_init(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ int ret;
+
+ ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
+ if (ret < 0) {
+ pr_err("ctnetlink_init: cannot register notifier.\n");
+ goto err_out;
+ }
+
+ ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
+ if (ret < 0) {
+ pr_err("ctnetlink_init: cannot expect register notifier.\n");
+ goto err_unreg_notifier;
+ }
+#endif
+ return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+ nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+err_out:
+ return ret;
+#endif
+}
+
+static void ctnetlink_net_exit(struct net *net)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp);
+ nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+#endif
+}
+
+static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
+{
+ struct net *net;
+
+ list_for_each_entry(net, net_exit_list, exit_list)
+ ctnetlink_net_exit(net);
+}
+
+static struct pernet_operations ctnetlink_net_ops = {
+ .init = ctnetlink_net_init,
+ .exit_batch = ctnetlink_net_exit_batch,
+};
+
static int __init ctnetlink_init(void)
{
int ret;
@@ -2180,28 +2251,15 @@ static int __init ctnetlink_init(void)
goto err_unreg_subsys;
}
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- ret = nf_conntrack_register_notifier(&ctnl_notifier);
- if (ret < 0) {
- pr_err("ctnetlink_init: cannot register notifier.\n");
+ if (register_pernet_subsys(&ctnetlink_net_ops)) {
+ pr_err("ctnetlink_init: cannot register pernet operations\n");
goto err_unreg_exp_subsys;
}
- ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
- if (ret < 0) {
- pr_err("ctnetlink_init: cannot expect register notifier.\n");
- goto err_unreg_notifier;
- }
-#endif
-
return 0;
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
- nf_conntrack_unregister_notifier(&ctnl_notifier);
err_unreg_exp_subsys:
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
err_unreg_subsys:
nfnetlink_subsys_unregister(&ctnl_subsys);
err_out:
@@ -2212,12 +2270,7 @@ static void __exit ctnetlink_exit(void)
{
pr_info("ctnetlink: unregistering from nfnetlink.\n");
- nf_ct_remove_userspace_expectations();
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
- nf_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
+ unregister_pernet_subsys(&ctnetlink_net_ops);
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
nfnetlink_subsys_unregister(&ctnl_subsys);
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 2e664a69d7db..d6dde6dc09e6 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -629,7 +629,7 @@ static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
struct nf_conn *ct)
{
@@ -770,7 +770,7 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
.error = dccp_error,
.print_tuple = dccp_print_tuple,
.print_conntrack = dccp_print_conntrack,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
.from_nlattr = nlattr_to_dccp,
@@ -792,7 +792,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
.error = dccp_error,
.print_tuple = dccp_print_tuple,
.print_conntrack = dccp_print_conntrack,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = dccp_to_nlattr,
.nlattr_size = dccp_nlattr_size,
.from_nlattr = nlattr_to_dccp,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d69facdd9a7a..f0338791b822 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -291,7 +291,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
.new = gre_new,
.destroy = gre_destroy,
.me = THIS_MODULE,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6772b1154654..afa69136061a 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -461,7 +461,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -666,7 +666,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.packet = sctp_packet,
.new = sctp_new,
.me = THIS_MODULE,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = sctp_to_nlattr,
.nlattr_size = sctp_nlattr_size,
.from_nlattr = nlattr_to_sctp,
@@ -696,7 +696,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.packet = sctp_packet,
.new = sctp_new,
.me = THIS_MODULE,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = sctp_to_nlattr,
.nlattr_size = sctp_nlattr_size,
.from_nlattr = nlattr_to_sctp,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8235b86b4e87..97b9f3ebf28c 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1126,7 +1126,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -1447,7 +1447,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.packet = tcp_packet,
.new = tcp_new,
.error = tcp_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
.from_nlattr = nlattr_to_tcp,
@@ -1479,7 +1479,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.packet = tcp_packet,
.new = tcp_new,
.error = tcp_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
.nlattr_size = tcp_nlattr_size,
.from_nlattr = nlattr_to_tcp,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8289088b8218..5f35757fbff0 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -188,7 +188,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
.packet = udp_packet,
.new = udp_new,
.error = udp_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -216,7 +216,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
.packet = udp_packet,
.new = udp_new,
.error = udp_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 263b5a72588d..f52ca1181013 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -174,7 +174,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
.packet = udplite_packet,
.new = udplite_new,
.error = udplite_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
@@ -198,7 +198,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
.packet = udplite_packet,
.new = udplite_new,
.error = udplite_error,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 05e9feb101c3..885f5ab9bc28 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -34,7 +34,7 @@
MODULE_LICENSE("GPL");
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
int
print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
@@ -396,7 +396,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net)
static void nf_conntrack_standalone_fini_proc(struct net *net)
{
}
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_NF_CONNTRACK_PROCFS */
/* Sysctl support */
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index af7dd31af0a1..e8d27afbbdb9 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -15,7 +15,7 @@
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
-static int nf_ct_tstamp __read_mostly;
+static bool nf_ct_tstamp __read_mostly;
module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
new file mode 100644
index 000000000000..11ba013e47f6
--- /dev/null
+++ b/net/netfilter/nfnetlink_acct.c
@@ -0,0 +1,361 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <asm/atomic.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
+
+static LIST_HEAD(nfnl_acct_list);
+
+struct nf_acct {
+ atomic64_t pkts;
+ atomic64_t bytes;
+ struct list_head head;
+ atomic_t refcnt;
+ char name[NFACCT_NAME_MAX];
+ struct rcu_head rcu_head;
+};
+
+static int
+nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ struct nf_acct *nfacct, *matching = NULL;
+ char *acct_name;
+
+ if (!tb[NFACCT_NAME])
+ return -EINVAL;
+
+ acct_name = nla_data(tb[NFACCT_NAME]);
+
+ list_for_each_entry(nfacct, &nfnl_acct_list, head) {
+ if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
+ continue;
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ matching = nfacct;
+ break;
+ }
+
+ if (matching) {
+ if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+ /* reset counters if you request a replacement. */
+ atomic64_set(&matching->pkts, 0);
+ atomic64_set(&matching->bytes, 0);
+ return 0;
+ }
+ return -EBUSY;
+ }
+
+ nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+ if (nfacct == NULL)
+ return -ENOMEM;
+
+ strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+
+ if (tb[NFACCT_BYTES]) {
+ atomic64_set(&nfacct->bytes,
+ be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES])));
+ }
+ if (tb[NFACCT_PKTS]) {
+ atomic64_set(&nfacct->pkts,
+ be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS])));
+ }
+ atomic_set(&nfacct->refcnt, 1);
+ list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
+ return 0;
+}
+
+static int
+nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ int event, struct nf_acct *acct)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = pid ? NLM_F_MULTI : 0;
+ u64 pkts, bytes;
+
+ event |= NFNL_SUBSYS_ACCT << 8;
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
+
+ if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
+ pkts = atomic64_xchg(&acct->pkts, 0);
+ bytes = atomic64_xchg(&acct->bytes, 0);
+ } else {
+ pkts = atomic64_read(&acct->pkts);
+ bytes = atomic64_read(&acct->bytes);
+ }
+ NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts));
+ NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes));
+ NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)));
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int
+nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nf_acct *cur, *last;
+
+ if (cb->args[2])
+ return 0;
+
+ last = (struct nf_acct *)cb->args[1];
+ if (cb->args[1])
+ cb->args[1] = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ if (last && cur != last)
+ continue;
+
+ if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ NFNL_MSG_ACCT_NEW, cur) < 0) {
+ cb->args[1] = (unsigned long)cur;
+ break;
+ }
+ }
+ if (!cb->args[1])
+ cb->args[2] = 1;
+ rcu_read_unlock();
+ return skb->len;
+}
+
+static int
+nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ int ret = -ENOENT;
+ struct nf_acct *cur;
+ char *acct_name;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
+ NULL, 0);
+ }
+
+ if (!tb[NFACCT_NAME])
+ return -EINVAL;
+ acct_name = nla_data(tb[NFACCT_NAME]);
+
+ list_for_each_entry(cur, &nfnl_acct_list, head) {
+ struct sk_buff *skb2;
+
+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+ continue;
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ NFNL_MSG_ACCT_NEW, cur);
+ if (ret <= 0) {
+ kfree_skb(skb2);
+ break;
+ }
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid,
+ MSG_DONTWAIT);
+ if (ret > 0)
+ ret = 0;
+
+ /* this avoids a loop in nfnetlink. */
+ return ret == -EAGAIN ? -ENOBUFS : ret;
+ }
+ return ret;
+}
+
+/* try to delete object, fail if it is still in use. */
+static int nfnl_acct_try_del(struct nf_acct *cur)
+{
+ int ret = 0;
+
+ /* we want to avoid races with nfnl_acct_find_get. */
+ if (atomic_dec_and_test(&cur->refcnt)) {
+ /* We are protected by nfnl mutex. */
+ list_del_rcu(&cur->head);
+ kfree_rcu(cur, rcu_head);
+ } else {
+ /* still in use, restore reference counter. */
+ atomic_inc(&cur->refcnt);
+ ret = -EBUSY;
+ }
+ return ret;
+}
+
+static int
+nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ char *acct_name;
+ struct nf_acct *cur;
+ int ret = -ENOENT;
+
+ if (!tb[NFACCT_NAME]) {
+ list_for_each_entry(cur, &nfnl_acct_list, head)
+ nfnl_acct_try_del(cur);
+
+ return 0;
+ }
+ acct_name = nla_data(tb[NFACCT_NAME]);
+
+ list_for_each_entry(cur, &nfnl_acct_list, head) {
+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
+ continue;
+
+ ret = nfnl_acct_try_del(cur);
+ if (ret < 0)
+ return ret;
+
+ break;
+ }
+ return ret;
+}
+
+static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
+ [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
+ [NFACCT_BYTES] = { .type = NLA_U64 },
+ [NFACCT_PKTS] = { .type = NLA_U64 },
+};
+
+static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
+ [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+ [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+ [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+ [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy },
+};
+
+static const struct nfnetlink_subsystem nfnl_acct_subsys = {
+ .name = "acct",
+ .subsys_id = NFNL_SUBSYS_ACCT,
+ .cb_count = NFNL_MSG_ACCT_MAX,
+ .cb = nfnl_acct_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
+
+struct nf_acct *nfnl_acct_find_get(const char *acct_name)
+{
+ struct nf_acct *cur, *acct = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+ continue;
+
+ if (!try_module_get(THIS_MODULE))
+ goto err;
+
+ if (!atomic_inc_not_zero(&cur->refcnt)) {
+ module_put(THIS_MODULE);
+ goto err;
+ }
+
+ acct = cur;
+ break;
+ }
+err:
+ rcu_read_unlock();
+ return acct;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
+
+void nfnl_acct_put(struct nf_acct *acct)
+{
+ atomic_dec(&acct->refcnt);
+ module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_put);
+
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+ atomic64_inc(&nfacct->pkts);
+ atomic64_add(skb->len, &nfacct->bytes);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_update);
+
+static int __init nfnl_acct_init(void)
+{
+ int ret;
+
+ pr_info("nfnl_acct: registering with nfnetlink.\n");
+ ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
+ if (ret < 0) {
+ pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
+ goto err_out;
+ }
+ return 0;
+err_out:
+ return ret;
+}
+
+static void __exit nfnl_acct_exit(void)
+{
+ struct nf_acct *cur, *tmp;
+
+ pr_info("nfnl_acct: unregistering from nfnetlink.\n");
+ nfnetlink_subsys_unregister(&nfnl_acct_subsys);
+
+ list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
+ list_del_rcu(&cur->head);
+ /* We are sure that our objects have no clients at this point,
+ * it's safe to release them all without checking refcnt. */
+ kfree_rcu(cur, rcu_head);
+ }
+}
+
+module_init(nfnl_acct_init);
+module_exit(nfnl_acct_exit);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 4bca15a0c385..ba92824086f3 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -98,6 +98,7 @@ static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
struct ipv6hdr _ip6h;
const struct ipv6hdr *ih;
u8 nexthdr;
+ __be16 frag_off;
int offset;
ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
@@ -108,7 +109,7 @@ static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
nexthdr = ih->nexthdr;
offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
- &nexthdr);
+ &nexthdr, &frag_off);
audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
&ih->saddr, &ih->daddr, nexthdr);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 0221d10de75a..8e87123f1373 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
int ret = 0;
u8 proto;
- if (info->flags & ~XT_CT_NOTRACK)
- return -EINVAL;
+ if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
+ return -EOPNOTSUPP;
if (info->flags & XT_CT_NOTRACK) {
ct = nf_ct_untracked_get();
@@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
GFP_KERNEL))
goto err3;
- if (info->helper[0]) {
+ if (info->flags & XT_CT_USERSPACE_HELPER) {
+ __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+ } else if (info->helper[0]) {
ret = -ENOENT;
proto = xt_ct_find_proto(par);
if (!proto) {
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index d4f4b5d66b20..95237c89607a 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -49,7 +49,7 @@ static u32 hash_v4(const struct sk_buff *skb)
return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval);
}
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static u32 hash_v6(const struct sk_buff *skb)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -74,7 +74,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
if (par->family == NFPROTO_IPV4)
queue = (((u64) hash_v4(skb) * info->queues_total) >>
32) + queue;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
queue = (((u64) hash_v6(skb) * info->queues_total) >>
32) + queue;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 9e63b43faeed..190ad37c5cf8 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -161,7 +161,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
struct flowi6 *fl6 = &fl.u.ip6;
memset(fl6, 0, sizeof(*fl6));
- ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr);
+ fl6->daddr = ipv6_hdr(skb)->saddr;
}
rcu_read_lock();
ai = nf_get_afinfo(family);
@@ -198,17 +198,18 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static unsigned int
tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u8 nexthdr;
+ __be16 frag_off;
int tcphoff;
int ret;
nexthdr = ipv6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
+ tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
if (tcphoff < 0)
return NF_DROP;
ret = tcpmss_mangle_packet(skb, par->targinfo,
@@ -259,7 +260,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
{
const struct xt_tcpmss_info *info = par->targinfo;
@@ -292,7 +293,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = {
.proto = IPPROTO_TCP,
.me = THIS_MODULE,
},
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.family = NFPROTO_IPV6,
.name = "TCPMSS",
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 9dc9ecfdd546..25fd1c4e1eec 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -80,16 +80,17 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
sizeof(struct iphdr) + sizeof(struct tcphdr));
}
-#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
static unsigned int
tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int tcphoff;
u_int8_t nexthdr;
+ __be16 frag_off;
nexthdr = ipv6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
+ tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
if (tcphoff < 0)
return NF_DROP;
@@ -108,7 +109,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
.me = THIS_MODULE,
},
-#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
{
.name = "TCPOPTSTRIP",
.family = NFPROTO_IPV6,
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 5f054a0dbbb1..3aae66facf9f 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -25,13 +25,10 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_TEE.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
# define WITH_CONNTRACK 1
# include <net/netfilter/nf_conntrack.h>
#endif
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-# define WITH_IPV6 1
-#endif
struct xt_tee_priv {
struct notifier_block notifier;
@@ -136,7 +133,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-#ifdef WITH_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
static bool
tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
{
@@ -196,7 +193,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
}
return XT_CONTINUE;
}
-#endif /* WITH_IPV6 */
+#endif
static int tee_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
@@ -276,7 +273,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
},
-#ifdef WITH_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
{
.name = "TEE",
.revision = 1,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index dcfd57eb9d02..35a959a096e0 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -22,7 +22,7 @@
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h>
#include <net/addrconf.h>
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index b77d383cec78..49c5ff7f6dd6 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -16,7 +16,7 @@
#include <linux/ip.h>
#include <net/route.h>
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_fib.h>
@@ -31,7 +31,7 @@ MODULE_DESCRIPTION("Xtables: address type match");
MODULE_ALIAS("ipt_addrtype");
MODULE_ALIAS("ip6t_addrtype");
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
const struct in6_addr *addr)
{
@@ -42,7 +42,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
int route_err;
memset(&flow, 0, sizeof(flow));
- ipv6_addr_copy(&flow.daddr, addr);
+ flow.daddr = *addr;
if (dev)
flow.flowi6_oif = dev->ifindex;
@@ -149,7 +149,7 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
dev = par->out;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
if (par->family == NFPROTO_IPV6)
return addrtype_mt6(net, dev, skb, info);
#endif
@@ -190,7 +190,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
return -EINVAL;
}
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
if (par->family == NFPROTO_IPV6) {
if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
pr_err("ipv6 BLACKHOLE matching not supported\n");
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5b138506690e..e595e07a759b 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -40,46 +40,46 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
case XT_CONNBYTES_PKTS:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
- what = counters[IP_CT_DIR_ORIGINAL].packets;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
break;
case XT_CONNBYTES_DIR_REPLY:
- what = counters[IP_CT_DIR_REPLY].packets;
+ what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
case XT_CONNBYTES_DIR_BOTH:
- what = counters[IP_CT_DIR_ORIGINAL].packets;
- what += counters[IP_CT_DIR_REPLY].packets;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
+ what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
}
break;
case XT_CONNBYTES_BYTES:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
- what = counters[IP_CT_DIR_ORIGINAL].bytes;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
break;
case XT_CONNBYTES_DIR_REPLY:
- what = counters[IP_CT_DIR_REPLY].bytes;
+ what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
break;
case XT_CONNBYTES_DIR_BOTH:
- what = counters[IP_CT_DIR_ORIGINAL].bytes;
- what += counters[IP_CT_DIR_REPLY].bytes;
+ what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
+ what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
break;
}
break;
case XT_CONNBYTES_AVGPKT:
switch (sinfo->direction) {
case XT_CONNBYTES_DIR_ORIGINAL:
- bytes = counters[IP_CT_DIR_ORIGINAL].bytes;
- pkts = counters[IP_CT_DIR_ORIGINAL].packets;
+ bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);
+ pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);
break;
case XT_CONNBYTES_DIR_REPLY:
- bytes = counters[IP_CT_DIR_REPLY].bytes;
- pkts = counters[IP_CT_DIR_REPLY].packets;
+ bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+ pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
case XT_CONNBYTES_DIR_BOTH:
- bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
- counters[IP_CT_DIR_REPLY].bytes;
- pkts = counters[IP_CT_DIR_ORIGINAL].packets +
- counters[IP_CT_DIR_REPLY].packets;
+ bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) +
+ atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);
+ pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) +
+ atomic64_read(&counters[IP_CT_DIR_REPLY].packets);
break;
}
if (pkts != 0)
@@ -87,10 +87,10 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
break;
}
- if (sinfo->count.to)
+ if (sinfo->count.to >= sinfo->count.from)
return what <= sinfo->count.to && what >= sinfo->count.from;
- else
- return what >= sinfo->count.from;
+ else /* inverted */
+ return what < sinfo->count.to || what > sinfo->count.from;
}
static int connbytes_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
new file mode 100644
index 000000000000..3c831a8efebc
--- /dev/null
+++ b/net/netfilter/xt_ecn.c
@@ -0,0 +1,179 @@
+/*
+ * Xtables module for matching the value of the IPv4/IPv6 and TCP ECN bits
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ecn.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ecn");
+MODULE_ALIAS("ip6t_ecn");
+
+static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_ecn_info *einfo = par->matchinfo;
+ struct tcphdr _tcph;
+ const struct tcphdr *th;
+
+ /* In practice, TCP match does this, so can't fail. But let's
+ * be good citizens.
+ */
+ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return false;
+
+ if (einfo->operation & XT_ECN_OP_MATCH_ECE) {
+ if (einfo->invert & XT_ECN_OP_MATCH_ECE) {
+ if (th->ece == 1)
+ return false;
+ } else {
+ if (th->ece == 0)
+ return false;
+ }
+ }
+
+ if (einfo->operation & XT_ECN_OP_MATCH_CWR) {
+ if (einfo->invert & XT_ECN_OP_MATCH_CWR) {
+ if (th->cwr == 1)
+ return false;
+ } else {
+ if (th->cwr == 0)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline bool match_ip(const struct sk_buff *skb,
+ const struct xt_ecn_info *einfo)
+{
+ return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^
+ !!(einfo->invert & XT_ECN_OP_MATCH_IP);
+}
+
+static bool ecn_mt4(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_ecn_info *info = par->matchinfo;
+
+ if (info->operation & XT_ECN_OP_MATCH_IP && !match_ip(skb, info))
+ return false;
+
+ if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+ !match_tcp(skb, par))
+ return false;
+
+ return true;
+}
+
+static int ecn_mt_check4(const struct xt_mtchk_param *par)
+{
+ const struct xt_ecn_info *info = par->matchinfo;
+ const struct ipt_ip *ip = par->entryinfo;
+
+ if (info->operation & XT_ECN_OP_MATCH_MASK)
+ return -EINVAL;
+
+ if (info->invert & XT_ECN_OP_MATCH_MASK)
+ return -EINVAL;
+
+ if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+ (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
+ pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline bool match_ipv6(const struct sk_buff *skb,
+ const struct xt_ecn_info *einfo)
+{
+ return (((ipv6_hdr(skb)->flow_lbl[0] >> 4) & XT_ECN_IP_MASK) ==
+ einfo->ip_ect) ^
+ !!(einfo->invert & XT_ECN_OP_MATCH_IP);
+}
+
+static bool ecn_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_ecn_info *info = par->matchinfo;
+
+ if (info->operation & XT_ECN_OP_MATCH_IP && !match_ipv6(skb, info))
+ return false;
+
+ if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+ !match_tcp(skb, par))
+ return false;
+
+ return true;
+}
+
+static int ecn_mt_check6(const struct xt_mtchk_param *par)
+{
+ const struct xt_ecn_info *info = par->matchinfo;
+ const struct ip6t_ip6 *ip = par->entryinfo;
+
+ if (info->operation & XT_ECN_OP_MATCH_MASK)
+ return -EINVAL;
+
+ if (info->invert & XT_ECN_OP_MATCH_MASK)
+ return -EINVAL;
+
+ if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
+ (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) {
+ pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match ecn_mt_reg[] __read_mostly = {
+ {
+ .name = "ecn",
+ .family = NFPROTO_IPV4,
+ .match = ecn_mt4,
+ .matchsize = sizeof(struct xt_ecn_info),
+ .checkentry = ecn_mt_check4,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "ecn",
+ .family = NFPROTO_IPV6,
+ .match = ecn_mt6,
+ .matchsize = sizeof(struct xt_ecn_info),
+ .checkentry = ecn_mt_check6,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init ecn_mt_init(void)
+{
+ return xt_register_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg));
+}
+
+static void __exit ecn_mt_exit(void)
+{
+ xt_unregister_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg));
+}
+
+module_init(ecn_mt_init);
+module_exit(ecn_mt_exit);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dfd52bad1523..8e4992101875 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -21,7 +21,7 @@
#include <linux/mm.h>
#include <linux/in.h>
#include <linux/ip.h>
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#include <linux/ipv6.h>
#include <net/ipv6.h>
#endif
@@ -64,7 +64,7 @@ struct dsthash_dst {
__be32 src;
__be32 dst;
} ip;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
struct {
__be32 src[4];
__be32 dst[4];
@@ -413,7 +413,7 @@ static inline __be32 maskl(__be32 a, unsigned int l)
return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0;
}
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
{
switch (p) {
@@ -445,6 +445,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
{
__be16 _ports[2], *ports;
u8 nexthdr;
+ __be16 frag_off;
int poff;
memset(dst, 0, sizeof(*dst));
@@ -463,7 +464,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
return 0;
nexthdr = ip_hdr(skb)->protocol;
break;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {
memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,
@@ -480,7 +481,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
(XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
return 0;
nexthdr = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);
if ((int)protoff < 0)
return -1;
break;
@@ -615,7 +616,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
},
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "hashlimit",
.revision = 1,
@@ -692,7 +693,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
ent->rateinfo.credit, ent->rateinfo.credit_cap,
ent->rateinfo.cost);
break;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
(long)(ent->expires - jiffies)/HZ,
@@ -760,7 +761,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net);
if (!hashlimit_net->ipt_hashlimit)
return -ENOMEM;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);
if (!hashlimit_net->ip6t_hashlimit) {
proc_net_remove(net, "ipt_hashlimit");
@@ -773,7 +774,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
static void __net_exit hashlimit_proc_net_exit(struct net *net)
{
proc_net_remove(net, "ipt_hashlimit");
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
proc_net_remove(net, "ip6t_hashlimit");
#endif
}
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
new file mode 100644
index 000000000000..b3be0ef21f19
--- /dev/null
+++ b/net/netfilter/xt_nfacct.c
@@ -0,0 +1,76 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 (or any
+ * later at your option) as published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+#include <linux/netfilter/xt_nfacct.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_nfacct");
+MODULE_ALIAS("ip6t_nfacct");
+
+static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_nfacct_match_info *info = par->targinfo;
+
+ nfnl_acct_update(skb, info->nfacct);
+
+ return true;
+}
+
+static int
+nfacct_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ struct xt_nfacct_match_info *info = par->matchinfo;
+ struct nf_acct *nfacct;
+
+ nfacct = nfnl_acct_find_get(info->name);
+ if (nfacct == NULL) {
+ pr_info("xt_nfacct: accounting object with name `%s' "
+ "does not exists\n", info->name);
+ return -ENOENT;
+ }
+ info->nfacct = nfacct;
+ return 0;
+}
+
+static void
+nfacct_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_nfacct_match_info *info = par->matchinfo;
+
+ nfnl_acct_put(info->nfacct);
+}
+
+static struct xt_match nfacct_mt_reg __read_mostly = {
+ .name = "nfacct",
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nfacct_mt_checkentry,
+ .match = nfacct_mt,
+ .destroy = nfacct_mt_destroy,
+ .matchsize = sizeof(struct xt_nfacct_match_info),
+ .me = THIS_MODULE,
+};
+
+static int __init nfacct_mt_init(void)
+{
+ return xt_register_match(&nfacct_mt_reg);
+}
+
+static void __exit nfacct_mt_exit(void)
+{
+ xt_unregister_match(&nfacct_mt_reg);
+}
+
+module_init(nfacct_mt_init);
+module_exit(nfacct_mt_exit);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index fe39f7e913df..72bb07f57f97 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,7 +22,7 @@
#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
@@ -30,7 +30,7 @@
#include <linux/netfilter/xt_socket.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#define XT_SOCKET_HAVE_CONNTRACK 1
#include <net/netfilter/nf_conntrack.h>
#endif
@@ -214,6 +214,7 @@ extract_icmp6_fields(const struct sk_buff *skb,
struct icmp6hdr *icmph, _icmph;
__be16 *ports, _ports[2];
u8 inside_nexthdr;
+ __be16 inside_fragoff;
int inside_hdrlen;
icmph = skb_header_pointer(skb, outside_hdrlen,
@@ -229,7 +230,8 @@ extract_icmp6_fields(const struct sk_buff *skb,
return 1;
inside_nexthdr = inside_iph->nexthdr;
- inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr);
+ inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph),
+ &inside_nexthdr, &inside_fragoff);
if (inside_hdrlen < 0)
return 1; /* hjm: Packet has no/incomplete transport layer headers. */
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index 96b749dacc34..6f1701322fb6 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -96,7 +96,7 @@ struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/**
* netlbl_af6list_search - Search for a matching IPv6 address entry
* @addr: IPv6 address
@@ -185,7 +185,7 @@ int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head)
return 0;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/**
* netlbl_af6list_add - Add a new IPv6 address entry to a list
* @entry: address entry
@@ -263,7 +263,7 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
return entry;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/**
* netlbl_af6list_remove_entry - Remove an IPv6 address entry
* @entry: address entry
@@ -342,7 +342,7 @@ void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
}
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/**
* netlbl_af6list_audit_addr - Audit an IPv6 address
* @audit_buf: audit buffer
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index fdbc1d2c7352..a1287ce18130 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -133,7 +133,7 @@ static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
}
#endif
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list)
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 3f905e5370c2..38204112b9f4 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -78,7 +78,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
struct netlbl_dom_map *ptr;
struct netlbl_af4list *iter4;
struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_af6list *iter6;
struct netlbl_af6list *tmp6;
#endif /* IPv6 */
@@ -90,7 +90,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
netlbl_af4list_remove_entry(iter4);
kfree(netlbl_domhsh_addr4_entry(iter4));
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_safe(iter6, tmp6,
&ptr->type_def.addrsel->list6) {
netlbl_af6list_remove_entry(iter6);
@@ -217,7 +217,7 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
cipsov4 = map4->type_def.cipsov4;
netlbl_af4list_audit_addr(audit_buf, 0, NULL,
addr4->addr, addr4->mask);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
} else if (addr6 != NULL) {
struct netlbl_domaddr6_map *map6;
map6 = netlbl_domhsh_addr6_entry(addr6);
@@ -306,7 +306,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
struct netlbl_dom_map *entry_old;
struct netlbl_af4list *iter4;
struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_af6list *iter6;
struct netlbl_af6list *tmp6;
#endif /* IPv6 */
@@ -338,7 +338,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
&entry->type_def.addrsel->list4)
netlbl_domhsh_audit_add(entry, iter4, NULL,
ret_val, audit_info);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(iter6,
&entry->type_def.addrsel->list6)
netlbl_domhsh_audit_add(entry, NULL, iter6,
@@ -365,7 +365,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
ret_val = -EEXIST;
goto add_return;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(iter6,
&entry->type_def.addrsel->list6)
if (netlbl_af6list_search_exact(&iter6->addr,
@@ -386,7 +386,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
if (ret_val != 0)
goto add_return;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_safe(iter6, tmp6,
&entry->type_def.addrsel->list6) {
netlbl_af6list_remove_entry(iter6);
@@ -510,7 +510,7 @@ int netlbl_domhsh_remove_af4(const char *domain,
struct netlbl_dom_map *entry_map;
struct netlbl_af4list *entry_addr;
struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_af6list *iter6;
#endif /* IPv6 */
struct netlbl_domaddr4_map *entry;
@@ -533,7 +533,7 @@ int netlbl_domhsh_remove_af4(const char *domain,
goto remove_af4_failure;
netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4)
goto remove_af4_single_addr;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6)
goto remove_af4_single_addr;
#endif /* IPv6 */
@@ -644,7 +644,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
return netlbl_domhsh_addr4_entry(addr_iter);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/**
* netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table
* @domain: the domain name to search for
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index bfcc0f7024c5..90872c4ca30f 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -104,7 +104,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
int (*callback) (struct netlbl_dom_map *entry, void *arg),
void *cb_arg);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
const struct in6_addr *addr);
#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 9c24de10a657..2560e7b441c6 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -111,8 +111,6 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
struct netlbl_domaddr_map *addrmap = NULL;
struct netlbl_domaddr4_map *map4 = NULL;
struct netlbl_domaddr6_map *map6 = NULL;
- const struct in_addr *addr4, *mask4;
- const struct in6_addr *addr6, *mask6;
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL)
@@ -133,9 +131,9 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
INIT_LIST_HEAD(&addrmap->list6);
switch (family) {
- case AF_INET:
- addr4 = addr;
- mask4 = mask;
+ case AF_INET: {
+ const struct in_addr *addr4 = addr;
+ const struct in_addr *mask4 = mask;
map4 = kzalloc(sizeof(*map4), GFP_ATOMIC);
if (map4 == NULL)
goto cfg_unlbl_map_add_failure;
@@ -148,25 +146,29 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
if (ret_val != 0)
goto cfg_unlbl_map_add_failure;
break;
- case AF_INET6:
- addr6 = addr;
- mask6 = mask;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6: {
+ const struct in6_addr *addr6 = addr;
+ const struct in6_addr *mask6 = mask;
map6 = kzalloc(sizeof(*map6), GFP_ATOMIC);
if (map6 == NULL)
goto cfg_unlbl_map_add_failure;
map6->type = NETLBL_NLTYPE_UNLABELED;
- ipv6_addr_copy(&map6->list.addr, addr6);
+ map6->list.addr = *addr6;
map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0];
map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1];
map6->list.addr.s6_addr32[2] &= mask6->s6_addr32[2];
map6->list.addr.s6_addr32[3] &= mask6->s6_addr32[3];
- ipv6_addr_copy(&map6->list.mask, mask6);
+ map6->list.mask = *mask6;
map6->list.valid = 1;
- ret_val = netlbl_af4list_add(&map4->list,
- &addrmap->list4);
+ ret_val = netlbl_af6list_add(&map6->list,
+ &addrmap->list6);
if (ret_val != 0)
goto cfg_unlbl_map_add_failure;
break;
+ }
+#endif /* IPv6 */
default:
goto cfg_unlbl_map_add_failure;
break;
@@ -225,9 +227,11 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
case AF_INET:
addr_len = sizeof(struct in_addr);
break;
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
addr_len = sizeof(struct in6_addr);
break;
+#endif /* IPv6 */
default:
return -EPFNOSUPPORT;
}
@@ -266,9 +270,11 @@ int netlbl_cfg_unlbl_static_del(struct net *net,
case AF_INET:
addr_len = sizeof(struct in_addr);
break;
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
addr_len = sizeof(struct in6_addr);
break;
+#endif /* IPv6 */
default:
return -EPFNOSUPPORT;
}
@@ -667,7 +673,7 @@ int netlbl_sock_setattr(struct sock *sk,
ret_val = -ENOENT;
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
/* since we don't support any IPv6 labeling protocols right
* now we can optimize everything away until we do */
@@ -718,7 +724,7 @@ int netlbl_sock_getattr(struct sock *sk,
case AF_INET:
ret_val = cipso_v4_sock_getattr(sk, secattr);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
ret_val = -ENOMSG;
break;
@@ -776,7 +782,7 @@ int netlbl_conn_setattr(struct sock *sk,
ret_val = -ENOENT;
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
/* since we don't support any IPv6 labeling protocols right
* now we can optimize everything away until we do */
@@ -847,7 +853,7 @@ int netlbl_req_setattr(struct request_sock *req,
ret_val = -ENOENT;
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
/* since we don't support any IPv6 labeling protocols right
* now we can optimize everything away until we do */
@@ -920,7 +926,7 @@ int netlbl_skbuff_setattr(struct sk_buff *skb,
ret_val = -ENOENT;
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
/* since we don't support any IPv6 labeling protocols right
* now we can optimize everything away until we do */
@@ -959,7 +965,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
cipso_v4_skbuff_getattr(skb, secattr) == 0)
return 0;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
break;
#endif /* IPv6 */
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index bfa555869775..4809e2e48b02 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -184,7 +184,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
entry->type = NETLBL_NLTYPE_ADDRSELECT;
entry->type_def.addrsel = addrmap;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
} else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
struct in6_addr *addr;
struct in6_addr *mask;
@@ -216,12 +216,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = -ENOMEM;
goto add_failure;
}
- ipv6_addr_copy(&map->list.addr, addr);
+ map->list.addr = *addr;
map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
map->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
- ipv6_addr_copy(&map->list.mask, mask);
+ map->list.mask = *mask;
map->list.valid = 1;
map->type = entry->type;
@@ -270,7 +270,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
struct nlattr *nla_a;
struct nlattr *nla_b;
struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_af6list *iter6;
#endif
@@ -324,7 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
nla_nest_end(skb, nla_b);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(iter6,
&entry->type_def.addrsel->list6) {
struct netlbl_domaddr6_map *map6;
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e251c2c88521..4b5fa0fe78fd 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -170,7 +170,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
struct netlbl_unlhsh_iface *iface;
struct netlbl_af4list *iter4;
struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_af6list *iter6;
struct netlbl_af6list *tmp6;
#endif /* IPv6 */
@@ -184,7 +184,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
netlbl_af4list_remove_entry(iter4);
kfree(netlbl_unlhsh_addr4_entry(iter4));
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
netlbl_af6list_remove_entry(iter6);
kfree(netlbl_unlhsh_addr6_entry(iter6));
@@ -274,7 +274,7 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
return ret_val;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/**
* netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table
* @iface: the associated interface entry
@@ -300,12 +300,12 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
if (entry == NULL)
return -ENOMEM;
- ipv6_addr_copy(&entry->list.addr, addr);
+ entry->list.addr = *addr;
entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
- ipv6_addr_copy(&entry->list.mask, mask);
+ entry->list.mask = *mask;
entry->list.valid = 1;
entry->secid = secid;
@@ -436,7 +436,7 @@ int netlbl_unlhsh_add(struct net *net,
mask4->s_addr);
break;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case sizeof(struct in6_addr): {
const struct in6_addr *addr6 = addr;
const struct in6_addr *mask6 = mask;
@@ -531,7 +531,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
return 0;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/**
* netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry
* @net: network namespace
@@ -606,14 +606,14 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
{
struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_af6list *iter6;
#endif /* IPv6 */
spin_lock(&netlbl_unlhsh_lock);
netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
goto unlhsh_condremove_failure;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
goto unlhsh_condremove_failure;
#endif /* IPv6 */
@@ -680,7 +680,7 @@ int netlbl_unlhsh_remove(struct net *net,
iface, addr, mask,
audit_info);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case sizeof(struct in6_addr):
ret_val = netlbl_unlhsh_remove_addr6(net,
iface, addr, mask,
@@ -1196,7 +1196,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
struct netlbl_unlhsh_iface *iface;
struct list_head *iter_list;
struct netlbl_af4list *addr4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct netlbl_af6list *addr6;
#endif
@@ -1228,7 +1228,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
goto unlabel_staticlist_return;
}
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(addr6,
&iface->addr6_list) {
if (iter_addr6++ < skip_addr6)
@@ -1277,7 +1277,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
u32 skip_addr6 = cb->args[1];
u32 iter_addr4 = 0;
struct netlbl_af4list *addr4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
u32 iter_addr6 = 0;
struct netlbl_af6list *addr6;
#endif
@@ -1303,7 +1303,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
goto unlabel_staticlistdef_return;
}
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
if (iter_addr6++ < skip_addr6)
continue;
@@ -1494,7 +1494,7 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
break;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case PF_INET6: {
struct ipv6hdr *hdr6;
struct netlbl_af6list *addr6;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1201b6d4183d..629b06182f3f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -139,12 +139,12 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
-static u32 netlink_group_mask(u32 group)
+static inline u32 netlink_group_mask(u32 group)
{
return group ? 1 << (group - 1) : 0;
}
-static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
+static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
{
return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
}
@@ -226,8 +226,7 @@ netlink_unlock_table(void)
wake_up(&nl_table_wait);
}
-static inline struct sock *netlink_lookup(struct net *net, int protocol,
- u32 pid)
+static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
{
struct nl_pid_hash *hash = &nl_table[protocol].hash;
struct hlist_head *head;
@@ -248,7 +247,7 @@ found:
return sk;
}
-static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
+static struct hlist_head *nl_pid_hash_zalloc(size_t size)
{
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_ATOMIC);
@@ -258,7 +257,7 @@ static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
get_order(size));
}
-static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
+static void nl_pid_hash_free(struct hlist_head *table, size_t size)
{
if (size <= PAGE_SIZE)
kfree(table);
@@ -578,7 +577,7 @@ retry:
return err;
}
-static inline int netlink_capable(struct socket *sock, unsigned int flag)
+static inline int netlink_capable(const struct socket *sock, unsigned int flag)
{
return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
capable(CAP_NET_ADMIN);
@@ -846,8 +845,7 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
sock_put(sk);
}
-static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
- gfp_t allocation)
+static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
{
int delta;
@@ -871,7 +869,7 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
return skb;
}
-static inline void netlink_rcv_wake(struct sock *sk)
+static void netlink_rcv_wake(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -881,7 +879,7 @@ static inline void netlink_rcv_wake(struct sock *sk)
wake_up_interruptible(&nlk->wait);
}
-static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
+static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
{
int ret;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -952,8 +950,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
}
EXPORT_SYMBOL_GPL(netlink_has_listeners);
-static inline int netlink_broadcast_deliver(struct sock *sk,
- struct sk_buff *skb)
+static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -962,7 +959,7 @@ static inline int netlink_broadcast_deliver(struct sock *sk,
skb_set_owner_r(skb, sk);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
- return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
+ return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
}
return -1;
}
@@ -982,7 +979,7 @@ struct netlink_broadcast_data {
void *tx_data;
};
-static inline int do_one_broadcast(struct sock *sk,
+static int do_one_broadcast(struct sock *sk,
struct netlink_broadcast_data *p)
{
struct netlink_sock *nlk = nlk_sk(sk);
@@ -1110,8 +1107,7 @@ struct netlink_set_err_data {
int code;
};
-static inline int do_one_set_err(struct sock *sk,
- struct netlink_set_err_data *p)
+static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
{
struct netlink_sock *nlk = nlk_sk(sk);
int ret = 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 482fa571b4ee..a403b618faa5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -33,6 +33,14 @@ void genl_unlock(void)
}
EXPORT_SYMBOL(genl_unlock);
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_genl_is_held(void)
+{
+ return lockdep_is_held(&genl_mutex);
+}
+EXPORT_SYMBOL(lockdep_genl_is_held);
+#endif
+
#define GENL_FAM_TAB_SIZE 16
#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
@@ -98,7 +106,7 @@ static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
/* Of course we are going to have problems once we hit
* 2^16 alive types, but that can only happen by year 2K
*/
-static inline u16 genl_generate_id(void)
+static u16 genl_generate_id(void)
{
static u16 id_gen_idx = GENL_MIN_ID;
int i;
@@ -784,6 +792,15 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]);
res = genl_family_find_byname(name);
+#ifdef CONFIG_MODULES
+ if (res == NULL) {
+ genl_unlock();
+ request_module("net-pf-%d-proto-%d-type-%s",
+ PF_NETLINK, NETLINK_GENERIC, name);
+ genl_lock();
+ res = genl_family_find_byname(name);
+ }
+#endif
err = -ENOENT;
}
@@ -946,3 +963,16 @@ int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group,
return genlmsg_mcast(skb, pid, group, flags);
}
EXPORT_SYMBOL(genlmsg_multicast_allns);
+
+void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+ struct nlmsghdr *nlh, gfp_t flags)
+{
+ struct sock *sk = net->genl_sock;
+ int report = 0;
+
+ if (nlh)
+ report = nlmsg_report(nlh);
+
+ nlmsg_notify(sk, skb, pid, group, report, flags);
+}
+EXPORT_SYMBOL(genl_notify);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 732152f718e0..7dab229bfbcc 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -306,26 +306,26 @@ static int nr_setsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
- int opt;
+ unsigned long opt;
if (level != SOL_NETROM)
return -ENOPROTOOPT;
- if (optlen < sizeof(int))
+ if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (get_user(opt, (int __user *)optval))
+ if (get_user(opt, (unsigned int __user *)optval))
return -EFAULT;
switch (optname) {
case NETROM_T1:
- if (opt < 1)
+ if (opt < 1 || opt > ULONG_MAX / HZ)
return -EINVAL;
nr->t1 = opt * HZ;
return 0;
case NETROM_T2:
- if (opt < 1)
+ if (opt < 1 || opt > ULONG_MAX / HZ)
return -EINVAL;
nr->t2 = opt * HZ;
return 0;
@@ -337,13 +337,13 @@ static int nr_setsockopt(struct socket *sock, int level, int optname,
return 0;
case NETROM_T4:
- if (opt < 1)
+ if (opt < 1 || opt > ULONG_MAX / HZ)
return -EINVAL;
nr->t4 = opt * HZ;
return 0;
case NETROM_IDLE:
- if (opt < 0)
+ if (opt > ULONG_MAX / (60 * HZ))
return -EINVAL;
nr->idle = opt * 60 * HZ;
return 0;
@@ -1244,7 +1244,8 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCADDRT:
case SIOCDELRT:
case SIOCNRDECOBS:
- if (!capable(CAP_NET_ADMIN)) return -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
return nr_rt_ioctl(cmd, argp);
default:
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 915a87ba23e1..2cf330162d7e 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -670,14 +670,17 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg)
case SIOCADDRT:
if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct)))
return -EFAULT;
- if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL)
+ if (nr_route.ndigis > AX25_MAX_DIGIS)
return -EINVAL;
- if (nr_route.ndigis < 0 || nr_route.ndigis > AX25_MAX_DIGIS) {
- dev_put(dev);
+ if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL)
return -EINVAL;
- }
switch (nr_route.type) {
case NETROM_NODE:
+ if (strnlen(nr_route.mnemonic, 7) == 7) {
+ ret = -EINVAL;
+ break;
+ }
+
ret = nr_add_node(&nr_route.callsign,
nr_route.mnemonic,
&nr_route.neighbour,
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
new file mode 100644
index 000000000000..d9ea33c361be
--- /dev/null
+++ b/net/openvswitch/Kconfig
@@ -0,0 +1,28 @@
+#
+# Open vSwitch
+#
+
+config OPENVSWITCH
+ tristate "Open vSwitch"
+ ---help---
+ Open vSwitch is a multilayer Ethernet switch targeted at virtualized
+ environments. In addition to supporting a variety of features
+ expected in a traditional hardware switch, it enables fine-grained
+ programmatic extension and flow-based control of the network. This
+ control is useful in a wide variety of applications but is
+ particularly important in multi-server virtualization deployments,
+ which are often characterized by highly dynamic endpoints and the
+ need to maintain logical abstractions for multiple tenants.
+
+ The Open vSwitch datapath provides an in-kernel fast path for packet
+ forwarding. It is complemented by a userspace daemon, ovs-vswitchd,
+ which is able to accept configuration from a variety of sources and
+ translate it into packet processing rules.
+
+ See http://openvswitch.org for more information and userspace
+ utilities.
+
+ To compile this code as a module, choose M here: the module will be
+ called openvswitch.
+
+ If unsure, say N.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
new file mode 100644
index 000000000000..15e7384745c1
--- /dev/null
+++ b/net/openvswitch/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for Open vSwitch.
+#
+
+obj-$(CONFIG_OPENVSWITCH) += openvswitch.o
+
+openvswitch-y := \
+ actions.o \
+ datapath.o \
+ dp_notify.o \
+ flow.o \
+ vport.o \
+ vport-internal_dev.o \
+ vport-netdev.o \
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
new file mode 100644
index 000000000000..2725d1bdf291
--- /dev/null
+++ b/net/openvswitch/actions.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/openvswitch.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/in6.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr, int len, bool keep_skb);
+
+static int make_writable(struct sk_buff *skb, int write_len)
+{
+ if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
+ return 0;
+
+ return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
+/* remove VLAN header from packet and update csum accrodingly. */
+static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
+{
+ struct vlan_hdr *vhdr;
+ int err;
+
+ err = make_writable(skb, VLAN_ETH_HLEN);
+ if (unlikely(err))
+ return err;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_sub(skb->csum, csum_partial(skb->data
+ + ETH_HLEN, VLAN_HLEN, 0));
+
+ vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
+ *current_tci = vhdr->h_vlan_TCI;
+
+ memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+ __skb_pull(skb, VLAN_HLEN);
+
+ vlan_set_encap_proto(skb, vhdr);
+ skb->mac_header += VLAN_HLEN;
+ skb_reset_mac_len(skb);
+
+ return 0;
+}
+
+static int pop_vlan(struct sk_buff *skb)
+{
+ __be16 tci;
+ int err;
+
+ if (likely(vlan_tx_tag_present(skb))) {
+ skb->vlan_tci = 0;
+ } else {
+ if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
+ skb->len < VLAN_ETH_HLEN))
+ return 0;
+
+ err = __pop_vlan_tci(skb, &tci);
+ if (err)
+ return err;
+ }
+ /* move next vlan tag to hw accel tag */
+ if (likely(skb->protocol != htons(ETH_P_8021Q) ||
+ skb->len < VLAN_ETH_HLEN))
+ return 0;
+
+ err = __pop_vlan_tci(skb, &tci);
+ if (unlikely(err))
+ return err;
+
+ __vlan_hwaccel_put_tag(skb, ntohs(tci));
+ return 0;
+}
+
+static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+{
+ if (unlikely(vlan_tx_tag_present(skb))) {
+ u16 current_tag;
+
+ /* push down current VLAN tag */
+ current_tag = vlan_tx_tag_get(skb);
+
+ if (!__vlan_put_tag(skb, current_tag))
+ return -ENOMEM;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(skb->csum, csum_partial(skb->data
+ + ETH_HLEN, VLAN_HLEN, 0));
+
+ }
+ __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+ return 0;
+}
+
+static int set_eth_addr(struct sk_buff *skb,
+ const struct ovs_key_ethernet *eth_key)
+{
+ int err;
+ err = make_writable(skb, ETH_HLEN);
+ if (unlikely(err))
+ return err;
+
+ memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
+ memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
+
+ return 0;
+}
+
+static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
+ __be32 *addr, __be32 new_addr)
+{
+ int transport_len = skb->len - skb_transport_offset(skb);
+
+ if (nh->protocol == IPPROTO_TCP) {
+ if (likely(transport_len >= sizeof(struct tcphdr)))
+ inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
+ *addr, new_addr, 1);
+ } else if (nh->protocol == IPPROTO_UDP) {
+ if (likely(transport_len >= sizeof(struct udphdr)))
+ inet_proto_csum_replace4(&udp_hdr(skb)->check, skb,
+ *addr, new_addr, 1);
+ }
+
+ csum_replace4(&nh->check, *addr, new_addr);
+ skb->rxhash = 0;
+ *addr = new_addr;
+}
+
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
+{
+ csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
+ nh->ttl = new_ttl;
+}
+
+static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+{
+ struct iphdr *nh;
+ int err;
+
+ err = make_writable(skb, skb_network_offset(skb) +
+ sizeof(struct iphdr));
+ if (unlikely(err))
+ return err;
+
+ nh = ip_hdr(skb);
+
+ if (ipv4_key->ipv4_src != nh->saddr)
+ set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+
+ if (ipv4_key->ipv4_dst != nh->daddr)
+ set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+
+ if (ipv4_key->ipv4_tos != nh->tos)
+ ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+
+ if (ipv4_key->ipv4_ttl != nh->ttl)
+ set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+
+ return 0;
+}
+
+/* Must follow make_writable() since that can move the skb data. */
+static void set_tp_port(struct sk_buff *skb, __be16 *port,
+ __be16 new_port, __sum16 *check)
+{
+ inet_proto_csum_replace2(check, skb, *port, new_port, 0);
+ *port = new_port;
+ skb->rxhash = 0;
+}
+
+static int set_udp_port(struct sk_buff *skb,
+ const struct ovs_key_udp *udp_port_key)
+{
+ struct udphdr *uh;
+ int err;
+
+ err = make_writable(skb, skb_transport_offset(skb) +
+ sizeof(struct udphdr));
+ if (unlikely(err))
+ return err;
+
+ uh = udp_hdr(skb);
+ if (udp_port_key->udp_src != uh->source)
+ set_tp_port(skb, &uh->source, udp_port_key->udp_src, &uh->check);
+
+ if (udp_port_key->udp_dst != uh->dest)
+ set_tp_port(skb, &uh->dest, udp_port_key->udp_dst, &uh->check);
+
+ return 0;
+}
+
+static int set_tcp_port(struct sk_buff *skb,
+ const struct ovs_key_tcp *tcp_port_key)
+{
+ struct tcphdr *th;
+ int err;
+
+ err = make_writable(skb, skb_transport_offset(skb) +
+ sizeof(struct tcphdr));
+ if (unlikely(err))
+ return err;
+
+ th = tcp_hdr(skb);
+ if (tcp_port_key->tcp_src != th->source)
+ set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+
+ if (tcp_port_key->tcp_dst != th->dest)
+ set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+
+ return 0;
+}
+
+static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+{
+ struct vport *vport;
+
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ vport = rcu_dereference(dp->ports[out_port]);
+ if (unlikely(!vport)) {
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+
+ ovs_vport_send(vport, skb);
+ return 0;
+}
+
+static int output_userspace(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{
+ struct dp_upcall_info upcall;
+ const struct nlattr *a;
+ int rem;
+
+ upcall.cmd = OVS_PACKET_CMD_ACTION;
+ upcall.key = &OVS_CB(skb)->flow->key;
+ upcall.userdata = NULL;
+ upcall.pid = 0;
+
+ for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+ a = nla_next(a, &rem)) {
+ switch (nla_type(a)) {
+ case OVS_USERSPACE_ATTR_USERDATA:
+ upcall.userdata = a;
+ break;
+
+ case OVS_USERSPACE_ATTR_PID:
+ upcall.pid = nla_get_u32(a);
+ break;
+ }
+ }
+
+ return ovs_dp_upcall(dp, skb, &upcall);
+}
+
+static int sample(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{
+ const struct nlattr *acts_list = NULL;
+ const struct nlattr *a;
+ int rem;
+
+ for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+ a = nla_next(a, &rem)) {
+ switch (nla_type(a)) {
+ case OVS_SAMPLE_ATTR_PROBABILITY:
+ if (net_random() >= nla_get_u32(a))
+ return 0;
+ break;
+
+ case OVS_SAMPLE_ATTR_ACTIONS:
+ acts_list = a;
+ break;
+ }
+ }
+
+ return do_execute_actions(dp, skb, nla_data(acts_list),
+ nla_len(acts_list), true);
+}
+
+static int execute_set_action(struct sk_buff *skb,
+ const struct nlattr *nested_attr)
+{
+ int err = 0;
+
+ switch (nla_type(nested_attr)) {
+ case OVS_KEY_ATTR_PRIORITY:
+ skb->priority = nla_get_u32(nested_attr);
+ break;
+
+ case OVS_KEY_ATTR_ETHERNET:
+ err = set_eth_addr(skb, nla_data(nested_attr));
+ break;
+
+ case OVS_KEY_ATTR_IPV4:
+ err = set_ipv4(skb, nla_data(nested_attr));
+ break;
+
+ case OVS_KEY_ATTR_TCP:
+ err = set_tcp_port(skb, nla_data(nested_attr));
+ break;
+
+ case OVS_KEY_ATTR_UDP:
+ err = set_udp_port(skb, nla_data(nested_attr));
+ break;
+ }
+
+ return err;
+}
+
+/* Execute a list of actions against 'skb'. */
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr, int len, bool keep_skb)
+{
+ /* Every output action needs a separate clone of 'skb', but the common
+ * case is just a single output action, so that doing a clone and
+ * then freeing the original skbuff is wasteful. So the following code
+ * is slightly obscure just to avoid that. */
+ int prev_port = -1;
+ const struct nlattr *a;
+ int rem;
+
+ for (a = attr, rem = len; rem > 0;
+ a = nla_next(a, &rem)) {
+ int err = 0;
+
+ if (prev_port != -1) {
+ do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
+ prev_port = -1;
+ }
+
+ switch (nla_type(a)) {
+ case OVS_ACTION_ATTR_OUTPUT:
+ prev_port = nla_get_u32(a);
+ break;
+
+ case OVS_ACTION_ATTR_USERSPACE:
+ output_userspace(dp, skb, a);
+ break;
+
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ err = push_vlan(skb, nla_data(a));
+ if (unlikely(err)) /* skb already freed. */
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_POP_VLAN:
+ err = pop_vlan(skb);
+ break;
+
+ case OVS_ACTION_ATTR_SET:
+ err = execute_set_action(skb, nla_data(a));
+ break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ err = sample(dp, skb, a);
+ break;
+ }
+
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ return err;
+ }
+ }
+
+ if (prev_port != -1) {
+ if (keep_skb)
+ skb = skb_clone(skb, GFP_ATOMIC);
+
+ do_output(dp, skb, prev_port);
+ } else if (!keep_skb)
+ consume_skb(skb);
+
+ return 0;
+}
+
+/* Execute a list of actions against 'skb'. */
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
+{
+ struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+
+ return do_execute_actions(dp, skb, acts->actions,
+ acts->actions_len, false);
+}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
new file mode 100644
index 000000000000..9a2725114e99
--- /dev/null
+++ b/net/openvswitch/datapath.c
@@ -0,0 +1,1912 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/jhash.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/etherdevice.h>
+#include <linux/genetlink.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/version.h>
+#include <linux/ethtool.h>
+#include <linux/wait.h>
+#include <asm/system.h>
+#include <asm/div64.h>
+#include <linux/highmem.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/inetdevice.h>
+#include <linux/list.h>
+#include <linux/openvswitch.h>
+#include <linux/rculist.h>
+#include <linux/dmi.h>
+#include <linux/workqueue.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "flow.h"
+#include "vport-internal_dev.h"
+
+/**
+ * DOC: Locking:
+ *
+ * Writes to device state (add/remove datapath, port, set operations on vports,
+ * etc.) are protected by RTNL.
+ *
+ * Writes to other state (flow table modifications, set miscellaneous datapath
+ * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
+ * genl_mutex.
+ *
+ * Reads are protected by RCU.
+ *
+ * There are a few special cases (mostly stats) that have their own
+ * synchronization but they nest under all of above and don't interact with
+ * each other.
+ */
+
+/* Global list of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+static LIST_HEAD(dps);
+
+#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
+static void rehash_flow_table(struct work_struct *work);
+static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+
+static struct vport *new_vport(const struct vport_parms *);
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+ const struct dp_upcall_info *);
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
+ const struct dp_upcall_info *);
+
+/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
+static struct datapath *get_dp(int dp_ifindex)
+{
+ struct datapath *dp = NULL;
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
+ if (dev) {
+ struct vport *vport = ovs_internal_dev_get_vport(dev);
+ if (vport)
+ dp = vport->dp;
+ }
+ rcu_read_unlock();
+
+ return dp;
+}
+
+/* Must be called with rcu_read_lock or RTNL lock. */
+const char *ovs_dp_name(const struct datapath *dp)
+{
+ struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]);
+ return vport->ops->get_name(vport);
+}
+
+static int get_dpifindex(struct datapath *dp)
+{
+ struct vport *local;
+ int ifindex;
+
+ rcu_read_lock();
+
+ local = rcu_dereference(dp->ports[OVSP_LOCAL]);
+ if (local)
+ ifindex = local->ops->get_ifindex(local);
+ else
+ ifindex = 0;
+
+ rcu_read_unlock();
+
+ return ifindex;
+}
+
+static void destroy_dp_rcu(struct rcu_head *rcu)
+{
+ struct datapath *dp = container_of(rcu, struct datapath, rcu);
+
+ ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
+ free_percpu(dp->stats_percpu);
+ kfree(dp);
+}
+
+/* Called with RTNL lock and genl_lock. */
+static struct vport *new_vport(const struct vport_parms *parms)
+{
+ struct vport *vport;
+
+ vport = ovs_vport_add(parms);
+ if (!IS_ERR(vport)) {
+ struct datapath *dp = parms->dp;
+
+ rcu_assign_pointer(dp->ports[parms->port_no], vport);
+ list_add(&vport->node, &dp->port_list);
+ }
+
+ return vport;
+}
+
+/* Called with RTNL lock. */
+void ovs_dp_detach_port(struct vport *p)
+{
+ ASSERT_RTNL();
+
+ /* First drop references to device. */
+ list_del(&p->node);
+ rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
+
+ /* Then destroy it. */
+ ovs_vport_del(p);
+}
+
+/* Must be called with rcu_read_lock. */
+void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+{
+ struct datapath *dp = p->dp;
+ struct sw_flow *flow;
+ struct dp_stats_percpu *stats;
+ struct sw_flow_key key;
+ u64 *stats_counter;
+ int error;
+ int key_len;
+
+ stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+ /* Extract flow from 'skb' into 'key'. */
+ error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
+ if (unlikely(error)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ /* Look up flow. */
+ flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+ if (unlikely(!flow)) {
+ struct dp_upcall_info upcall;
+
+ upcall.cmd = OVS_PACKET_CMD_MISS;
+ upcall.key = &key;
+ upcall.userdata = NULL;
+ upcall.pid = p->upcall_pid;
+ ovs_dp_upcall(dp, skb, &upcall);
+ consume_skb(skb);
+ stats_counter = &stats->n_missed;
+ goto out;
+ }
+
+ OVS_CB(skb)->flow = flow;
+
+ stats_counter = &stats->n_hit;
+ ovs_flow_used(OVS_CB(skb)->flow, skb);
+ ovs_execute_actions(dp, skb);
+
+out:
+ /* Update datapath statistics. */
+ u64_stats_update_begin(&stats->sync);
+ (*stats_counter)++;
+ u64_stats_update_end(&stats->sync);
+}
+
+static struct genl_family dp_packet_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_PACKET_FAMILY,
+ .version = OVS_PACKET_VERSION,
+ .maxattr = OVS_PACKET_ATTR_MAX
+};
+
+int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info)
+{
+ struct dp_stats_percpu *stats;
+ int dp_ifindex;
+ int err;
+
+ if (upcall_info->pid == 0) {
+ err = -ENOTCONN;
+ goto err;
+ }
+
+ dp_ifindex = get_dpifindex(dp);
+ if (!dp_ifindex) {
+ err = -ENODEV;
+ goto err;
+ }
+
+ if (!skb_is_gso(skb))
+ err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+ else
+ err = queue_gso_packets(dp_ifindex, skb, upcall_info);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+ u64_stats_update_begin(&stats->sync);
+ stats->n_lost++;
+ u64_stats_update_end(&stats->sync);
+
+ return err;
+}
+
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info)
+{
+ struct dp_upcall_info later_info;
+ struct sw_flow_key later_key;
+ struct sk_buff *segs, *nskb;
+ int err;
+
+ segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Queue all of the segments. */
+ skb = segs;
+ do {
+ err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+ if (err)
+ break;
+
+ if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+ /* The initial flow key extracted by ovs_flow_extract()
+ * in this case is for a first fragment, so we need to
+ * properly mark later fragments.
+ */
+ later_key = *upcall_info->key;
+ later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+
+ later_info = *upcall_info;
+ later_info.key = &later_key;
+ upcall_info = &later_info;
+ }
+ } while ((skb = skb->next));
+
+ /* Free all of the segments. */
+ skb = segs;
+ do {
+ nskb = skb->next;
+ if (err)
+ kfree_skb(skb);
+ else
+ consume_skb(skb);
+ } while ((skb = nskb));
+ return err;
+}
+
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info)
+{
+ struct ovs_header *upcall;
+ struct sk_buff *nskb = NULL;
+ struct sk_buff *user_skb; /* to be queued to userspace */
+ struct nlattr *nla;
+ unsigned int len;
+ int err;
+
+ if (vlan_tx_tag_present(skb)) {
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
+ if (!skb)
+ return -ENOMEM;
+
+ nskb->vlan_tci = 0;
+ skb = nskb;
+ }
+
+ if (nla_attr_size(skb->len) > USHRT_MAX) {
+ err = -EFBIG;
+ goto out;
+ }
+
+ len = sizeof(struct ovs_header);
+ len += nla_total_size(skb->len);
+ len += nla_total_size(FLOW_BUFSIZE);
+ if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
+ len += nla_total_size(8);
+
+ user_skb = genlmsg_new(len, GFP_ATOMIC);
+ if (!user_skb) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+ 0, upcall_info->cmd);
+ upcall->dp_ifindex = dp_ifindex;
+
+ nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
+ ovs_flow_to_nlattrs(upcall_info->key, user_skb);
+ nla_nest_end(user_skb, nla);
+
+ if (upcall_info->userdata)
+ nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
+ nla_get_u64(upcall_info->userdata));
+
+ nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
+
+ skb_copy_and_csum_dev(skb, nla_data(nla));
+
+ err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
+
+out:
+ kfree_skb(nskb);
+ return err;
+}
+
+/* Called with genl_mutex. */
+static int flush_flows(int dp_ifindex)
+{
+ struct flow_table *old_table;
+ struct flow_table *new_table;
+ struct datapath *dp;
+
+ dp = get_dp(dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
+ old_table = genl_dereference(dp->table);
+ new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
+ if (!new_table)
+ return -ENOMEM;
+
+ rcu_assign_pointer(dp->table, new_table);
+
+ ovs_flow_tbl_deferred_destroy(old_table);
+ return 0;
+}
+
+static int validate_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key, int depth);
+
+static int validate_sample(const struct nlattr *attr,
+ const struct sw_flow_key *key, int depth)
+{
+ const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
+ const struct nlattr *probability, *actions;
+ const struct nlattr *a;
+ int rem;
+
+ memset(attrs, 0, sizeof(attrs));
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
+ return -EINVAL;
+ attrs[type] = a;
+ }
+ if (rem)
+ return -EINVAL;
+
+ probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
+ if (!probability || nla_len(probability) != sizeof(u32))
+ return -EINVAL;
+
+ actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
+ if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+ return -EINVAL;
+ return validate_actions(actions, key, depth + 1);
+}
+
+static int validate_set(const struct nlattr *a,
+ const struct sw_flow_key *flow_key)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ int key_type = nla_type(ovs_key);
+
+ /* There can be only one key in a action */
+ if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+ return -EINVAL;
+
+ if (key_type > OVS_KEY_ATTR_MAX ||
+ nla_len(ovs_key) != ovs_key_lens[key_type])
+ return -EINVAL;
+
+ switch (key_type) {
+ const struct ovs_key_ipv4 *ipv4_key;
+
+ case OVS_KEY_ATTR_PRIORITY:
+ case OVS_KEY_ATTR_ETHERNET:
+ break;
+
+ case OVS_KEY_ATTR_IPV4:
+ if (flow_key->eth.type != htons(ETH_P_IP))
+ return -EINVAL;
+
+ if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
+ return -EINVAL;
+
+ ipv4_key = nla_data(ovs_key);
+ if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+ return -EINVAL;
+
+ break;
+
+ case OVS_KEY_ATTR_TCP:
+ if (flow_key->ip.proto != IPPROTO_TCP)
+ return -EINVAL;
+
+ if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+ return -EINVAL;
+
+ break;
+
+ case OVS_KEY_ATTR_UDP:
+ if (flow_key->ip.proto != IPPROTO_UDP)
+ return -EINVAL;
+
+ if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+ return -EINVAL;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+ static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
+ [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+ [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+ };
+ struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+ int error;
+
+ error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
+ attr, userspace_policy);
+ if (error)
+ return error;
+
+ if (!a[OVS_USERSPACE_ATTR_PID] ||
+ !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int validate_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key, int depth)
+{
+ const struct nlattr *a;
+ int rem, err;
+
+ if (depth >= SAMPLE_ACTION_DEPTH)
+ return -EOVERFLOW;
+
+ nla_for_each_nested(a, attr, rem) {
+ /* Expected argument lengths, (u32)-1 for variable length. */
+ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
+ [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+ [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+ [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
+ [OVS_ACTION_ATTR_POP_VLAN] = 0,
+ [OVS_ACTION_ATTR_SET] = (u32)-1,
+ [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+ };
+ const struct ovs_action_push_vlan *vlan;
+ int type = nla_type(a);
+
+ if (type > OVS_ACTION_ATTR_MAX ||
+ (action_lens[type] != nla_len(a) &&
+ action_lens[type] != (u32)-1))
+ return -EINVAL;
+
+ switch (type) {
+ case OVS_ACTION_ATTR_UNSPEC:
+ return -EINVAL;
+
+ case OVS_ACTION_ATTR_USERSPACE:
+ err = validate_userspace(a);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_OUTPUT:
+ if (nla_get_u32(a) >= DP_MAX_PORTS)
+ return -EINVAL;
+ break;
+
+
+ case OVS_ACTION_ATTR_POP_VLAN:
+ break;
+
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ vlan = nla_data(a);
+ if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+ return -EINVAL;
+ if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+ return -EINVAL;
+ break;
+
+ case OVS_ACTION_ATTR_SET:
+ err = validate_set(a, key);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ err = validate_sample(a, key, depth);
+ if (err)
+ return err;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void clear_stats(struct sw_flow *flow)
+{
+ flow->used = 0;
+ flow->tcp_flags = 0;
+ flow->packet_count = 0;
+ flow->byte_count = 0;
+}
+
+static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ovs_header *ovs_header = info->userhdr;
+ struct nlattr **a = info->attrs;
+ struct sw_flow_actions *acts;
+ struct sk_buff *packet;
+ struct sw_flow *flow;
+ struct datapath *dp;
+ struct ethhdr *eth;
+ int len;
+ int err;
+ int key_len;
+
+ err = -EINVAL;
+ if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
+ !a[OVS_PACKET_ATTR_ACTIONS] ||
+ nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
+ goto err;
+
+ len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
+ packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!packet)
+ goto err;
+ skb_reserve(packet, NET_IP_ALIGN);
+
+ memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
+
+ skb_reset_mac_header(packet);
+ eth = eth_hdr(packet);
+
+ /* Normally, setting the skb 'protocol' field would be handled by a
+ * call to eth_type_trans(), but it assumes there's a sending
+ * device, which we may not have. */
+ if (ntohs(eth->h_proto) >= 1536)
+ packet->protocol = eth->h_proto;
+ else
+ packet->protocol = htons(ETH_P_802_2);
+
+ /* Build an sw_flow for sending this packet. */
+ flow = ovs_flow_alloc();
+ err = PTR_ERR(flow);
+ if (IS_ERR(flow))
+ goto err_kfree_skb;
+
+ err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
+ if (err)
+ goto err_flow_free;
+
+ err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
+ &flow->key.phy.in_port,
+ a[OVS_PACKET_ATTR_KEY]);
+ if (err)
+ goto err_flow_free;
+
+ err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
+ if (err)
+ goto err_flow_free;
+
+ flow->hash = ovs_flow_hash(&flow->key, key_len);
+
+ acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
+ err = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto err_flow_free;
+ rcu_assign_pointer(flow->sf_acts, acts);
+
+ OVS_CB(packet)->flow = flow;
+ packet->priority = flow->key.phy.priority;
+
+ rcu_read_lock();
+ dp = get_dp(ovs_header->dp_ifindex);
+ err = -ENODEV;
+ if (!dp)
+ goto err_unlock;
+
+ local_bh_disable();
+ err = ovs_execute_actions(dp, packet);
+ local_bh_enable();
+ rcu_read_unlock();
+
+ ovs_flow_free(flow);
+ return err;
+
+err_unlock:
+ rcu_read_unlock();
+err_flow_free:
+ ovs_flow_free(flow);
+err_kfree_skb:
+ kfree_skb(packet);
+err:
+ return err;
+}
+
+static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
+ [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+ [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
+ [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_ops dp_packet_genl_ops[] = {
+ { .cmd = OVS_PACKET_CMD_EXECUTE,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = packet_policy,
+ .doit = ovs_packet_cmd_execute
+ }
+};
+
+static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
+{
+ int i;
+ struct flow_table *table = genl_dereference(dp->table);
+
+ stats->n_flows = ovs_flow_tbl_count(table);
+
+ stats->n_hit = stats->n_missed = stats->n_lost = 0;
+ for_each_possible_cpu(i) {
+ const struct dp_stats_percpu *percpu_stats;
+ struct dp_stats_percpu local_stats;
+ unsigned int start;
+
+ percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+ do {
+ start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+ local_stats = *percpu_stats;
+ } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+
+ stats->n_hit += local_stats.n_hit;
+ stats->n_missed += local_stats.n_missed;
+ stats->n_lost += local_stats.n_lost;
+ }
+}
+
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+ [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+};
+
+static struct genl_family dp_flow_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_FLOW_FAMILY,
+ .version = OVS_FLOW_VERSION,
+ .maxattr = OVS_FLOW_ATTR_MAX
+};
+
+static struct genl_multicast_group ovs_dp_flow_multicast_group = {
+ .name = OVS_FLOW_MCGROUP
+};
+
+/* Called with genl_lock. */
+static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
+ struct sk_buff *skb, u32 pid,
+ u32 seq, u32 flags, u8 cmd)
+{
+ const int skb_orig_len = skb->len;
+ const struct sw_flow_actions *sf_acts;
+ struct ovs_flow_stats stats;
+ struct ovs_header *ovs_header;
+ struct nlattr *nla;
+ unsigned long used;
+ u8 tcp_flags;
+ int err;
+
+ sf_acts = rcu_dereference_protected(flow->sf_acts,
+ lockdep_genl_is_held());
+
+ ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
+ if (!ovs_header)
+ return -EMSGSIZE;
+
+ ovs_header->dp_ifindex = get_dpifindex(dp);
+
+ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
+ if (!nla)
+ goto nla_put_failure;
+ err = ovs_flow_to_nlattrs(&flow->key, skb);
+ if (err)
+ goto error;
+ nla_nest_end(skb, nla);
+
+ spin_lock_bh(&flow->lock);
+ used = flow->used;
+ stats.n_packets = flow->packet_count;
+ stats.n_bytes = flow->byte_count;
+ tcp_flags = flow->tcp_flags;
+ spin_unlock_bh(&flow->lock);
+
+ if (used)
+ NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used));
+
+ if (stats.n_packets)
+ NLA_PUT(skb, OVS_FLOW_ATTR_STATS,
+ sizeof(struct ovs_flow_stats), &stats);
+
+ if (tcp_flags)
+ NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
+
+ /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+ * this is the first flow to be dumped into 'skb'. This is unusual for
+ * Netlink but individual action lists can be longer than
+ * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+ * The userspace caller can always fetch the actions separately if it
+ * really wants them. (Most userspace callers in fact don't care.)
+ *
+ * This can only fail for dump operations because the skb is always
+ * properly sized for single flows.
+ */
+ err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
+ sf_acts->actions);
+ if (err < 0 && skb_orig_len)
+ goto error;
+
+ return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+error:
+ genlmsg_cancel(skb, ovs_header);
+ return err;
+}
+
+static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
+{
+ const struct sw_flow_actions *sf_acts;
+ int len;
+
+ sf_acts = rcu_dereference_protected(flow->sf_acts,
+ lockdep_genl_is_held());
+
+ /* OVS_FLOW_ATTR_KEY */
+ len = nla_total_size(FLOW_BUFSIZE);
+ /* OVS_FLOW_ATTR_ACTIONS */
+ len += nla_total_size(sf_acts->actions_len);
+ /* OVS_FLOW_ATTR_STATS */
+ len += nla_total_size(sizeof(struct ovs_flow_stats));
+ /* OVS_FLOW_ATTR_TCP_FLAGS */
+ len += nla_total_size(1);
+ /* OVS_FLOW_ATTR_USED */
+ len += nla_total_size(8);
+
+ len += NLMSG_ALIGN(sizeof(struct ovs_header));
+
+ return genlmsg_new(len, GFP_KERNEL);
+}
+
+static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
+ struct datapath *dp,
+ u32 pid, u32 seq, u8 cmd)
+{
+ struct sk_buff *skb;
+ int retval;
+
+ skb = ovs_flow_cmd_alloc_info(flow);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
+ BUG_ON(retval < 0);
+ return skb;
+}
+
+static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct sw_flow_key key;
+ struct sw_flow *flow;
+ struct sk_buff *reply;
+ struct datapath *dp;
+ struct flow_table *table;
+ int error;
+ int key_len;
+
+ /* Extract key. */
+ error = -EINVAL;
+ if (!a[OVS_FLOW_ATTR_KEY])
+ goto error;
+ error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+ if (error)
+ goto error;
+
+ /* Validate actions. */
+ if (a[OVS_FLOW_ATTR_ACTIONS]) {
+ error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0);
+ if (error)
+ goto error;
+ } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
+ error = -EINVAL;
+ goto error;
+ }
+
+ dp = get_dp(ovs_header->dp_ifindex);
+ error = -ENODEV;
+ if (!dp)
+ goto error;
+
+ table = genl_dereference(dp->table);
+ flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ if (!flow) {
+ struct sw_flow_actions *acts;
+
+ /* Bail out if we're not allowed to create a new flow. */
+ error = -ENOENT;
+ if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
+ goto error;
+
+ /* Expand table, if necessary, to make room. */
+ if (ovs_flow_tbl_need_to_expand(table)) {
+ struct flow_table *new_table;
+
+ new_table = ovs_flow_tbl_expand(table);
+ if (!IS_ERR(new_table)) {
+ rcu_assign_pointer(dp->table, new_table);
+ ovs_flow_tbl_deferred_destroy(table);
+ table = genl_dereference(dp->table);
+ }
+ }
+
+ /* Allocate flow. */
+ flow = ovs_flow_alloc();
+ if (IS_ERR(flow)) {
+ error = PTR_ERR(flow);
+ goto error;
+ }
+ flow->key = key;
+ clear_stats(flow);
+
+ /* Obtain actions. */
+ acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
+ error = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto error_free_flow;
+ rcu_assign_pointer(flow->sf_acts, acts);
+
+ /* Put flow in bucket. */
+ flow->hash = ovs_flow_hash(&key, key_len);
+ ovs_flow_tbl_insert(table, flow);
+
+ reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+ info->snd_seq,
+ OVS_FLOW_CMD_NEW);
+ } else {
+ /* We found a matching flow. */
+ struct sw_flow_actions *old_acts;
+ struct nlattr *acts_attrs;
+
+ /* Bail out if we're not allowed to modify an existing flow.
+ * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
+ * because Generic Netlink treats the latter as a dump
+ * request. We also accept NLM_F_EXCL in case that bug ever
+ * gets fixed.
+ */
+ error = -EEXIST;
+ if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
+ info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
+ goto error;
+
+ /* Update actions. */
+ old_acts = rcu_dereference_protected(flow->sf_acts,
+ lockdep_genl_is_held());
+ acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
+ if (acts_attrs &&
+ (old_acts->actions_len != nla_len(acts_attrs) ||
+ memcmp(old_acts->actions, nla_data(acts_attrs),
+ old_acts->actions_len))) {
+ struct sw_flow_actions *new_acts;
+
+ new_acts = ovs_flow_actions_alloc(acts_attrs);
+ error = PTR_ERR(new_acts);
+ if (IS_ERR(new_acts))
+ goto error;
+
+ rcu_assign_pointer(flow->sf_acts, new_acts);
+ ovs_flow_deferred_free_acts(old_acts);
+ }
+
+ reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+ info->snd_seq, OVS_FLOW_CMD_NEW);
+
+ /* Clear stats. */
+ if (a[OVS_FLOW_ATTR_CLEAR]) {
+ spin_lock_bh(&flow->lock);
+ clear_stats(flow);
+ spin_unlock_bh(&flow->lock);
+ }
+ }
+
+ if (!IS_ERR(reply))
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_flow_multicast_group.id, info->nlhdr,
+ GFP_KERNEL);
+ else
+ netlink_set_err(init_net.genl_sock, 0,
+ ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
+ return 0;
+
+error_free_flow:
+ ovs_flow_free(flow);
+error:
+ return error;
+}
+
+static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct sw_flow_key key;
+ struct sk_buff *reply;
+ struct sw_flow *flow;
+ struct datapath *dp;
+ struct flow_table *table;
+ int err;
+ int key_len;
+
+ if (!a[OVS_FLOW_ATTR_KEY])
+ return -EINVAL;
+ err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+ if (err)
+ return err;
+
+ dp = get_dp(ovs_header->dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
+ table = genl_dereference(dp->table);
+ flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ if (!flow)
+ return -ENOENT;
+
+ reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+ info->snd_seq, OVS_FLOW_CMD_NEW);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ return genlmsg_reply(reply, info);
+}
+
+static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct sw_flow_key key;
+ struct sk_buff *reply;
+ struct sw_flow *flow;
+ struct datapath *dp;
+ struct flow_table *table;
+ int err;
+ int key_len;
+
+ if (!a[OVS_FLOW_ATTR_KEY])
+ return flush_flows(ovs_header->dp_ifindex);
+ err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+ if (err)
+ return err;
+
+ dp = get_dp(ovs_header->dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
+ table = genl_dereference(dp->table);
+ flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ if (!flow)
+ return -ENOENT;
+
+ reply = ovs_flow_cmd_alloc_info(flow);
+ if (!reply)
+ return -ENOMEM;
+
+ ovs_flow_tbl_remove(table, flow);
+
+ err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
+ info->snd_seq, 0, OVS_FLOW_CMD_DEL);
+ BUG_ON(err < 0);
+
+ ovs_flow_deferred_free(flow);
+
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ return 0;
+}
+
+static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+ struct datapath *dp;
+ struct flow_table *table;
+
+ dp = get_dp(ovs_header->dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
+ table = genl_dereference(dp->table);
+
+ for (;;) {
+ struct sw_flow *flow;
+ u32 bucket, obj;
+
+ bucket = cb->args[0];
+ obj = cb->args[1];
+ flow = ovs_flow_tbl_next(table, &bucket, &obj);
+ if (!flow)
+ break;
+
+ if (ovs_flow_cmd_fill_info(flow, dp, skb,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ OVS_FLOW_CMD_NEW) < 0)
+ break;
+
+ cb->args[0] = bucket;
+ cb->args[1] = obj;
+ }
+ return skb->len;
+}
+
+static struct genl_ops dp_flow_genl_ops[] = {
+ { .cmd = OVS_FLOW_CMD_NEW,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = flow_policy,
+ .doit = ovs_flow_cmd_new_or_set
+ },
+ { .cmd = OVS_FLOW_CMD_DEL,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = flow_policy,
+ .doit = ovs_flow_cmd_del
+ },
+ { .cmd = OVS_FLOW_CMD_GET,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = flow_policy,
+ .doit = ovs_flow_cmd_get,
+ .dumpit = ovs_flow_cmd_dump
+ },
+ { .cmd = OVS_FLOW_CMD_SET,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = flow_policy,
+ .doit = ovs_flow_cmd_new_or_set,
+ },
+};
+
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+ [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+};
+
+static struct genl_family dp_datapath_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_DATAPATH_FAMILY,
+ .version = OVS_DATAPATH_VERSION,
+ .maxattr = OVS_DP_ATTR_MAX
+};
+
+static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+ .name = OVS_DATAPATH_MCGROUP
+};
+
+static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
+ u32 pid, u32 seq, u32 flags, u8 cmd)
+{
+ struct ovs_header *ovs_header;
+ struct ovs_dp_stats dp_stats;
+ int err;
+
+ ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
+ flags, cmd);
+ if (!ovs_header)
+ goto error;
+
+ ovs_header->dp_ifindex = get_dpifindex(dp);
+
+ rcu_read_lock();
+ err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
+ rcu_read_unlock();
+ if (err)
+ goto nla_put_failure;
+
+ get_dp_stats(dp, &dp_stats);
+ NLA_PUT(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats);
+
+ return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+ genlmsg_cancel(skb, ovs_header);
+error:
+ return -EMSGSIZE;
+}
+
+static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
+ u32 seq, u8 cmd)
+{
+ struct sk_buff *skb;
+ int retval;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
+ if (retval < 0) {
+ kfree_skb(skb);
+ return ERR_PTR(retval);
+ }
+ return skb;
+}
+
+/* Called with genl_mutex and optionally with RTNL lock also. */
+static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
+ struct nlattr *a[OVS_DP_ATTR_MAX + 1])
+{
+ struct datapath *dp;
+
+ if (!a[OVS_DP_ATTR_NAME])
+ dp = get_dp(ovs_header->dp_ifindex);
+ else {
+ struct vport *vport;
+
+ rcu_read_lock();
+ vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
+ dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
+ rcu_read_unlock();
+ }
+ return dp ? dp : ERR_PTR(-ENODEV);
+}
+
+static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct vport_parms parms;
+ struct sk_buff *reply;
+ struct datapath *dp;
+ struct vport *vport;
+ int err;
+
+ err = -EINVAL;
+ if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
+ goto err;
+
+ rtnl_lock();
+ err = -ENODEV;
+ if (!try_module_get(THIS_MODULE))
+ goto err_unlock_rtnl;
+
+ err = -ENOMEM;
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ if (dp == NULL)
+ goto err_put_module;
+ INIT_LIST_HEAD(&dp->port_list);
+
+ /* Allocate table. */
+ err = -ENOMEM;
+ rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
+ if (!dp->table)
+ goto err_free_dp;
+
+ dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+ if (!dp->stats_percpu) {
+ err = -ENOMEM;
+ goto err_destroy_table;
+ }
+
+ /* Set up our datapath device. */
+ parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
+ parms.type = OVS_VPORT_TYPE_INTERNAL;
+ parms.options = NULL;
+ parms.dp = dp;
+ parms.port_no = OVSP_LOCAL;
+ parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+
+ vport = new_vport(&parms);
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ if (err == -EBUSY)
+ err = -EEXIST;
+
+ goto err_destroy_percpu;
+ }
+
+ reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+ info->snd_seq, OVS_DP_CMD_NEW);
+ err = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto err_destroy_local_port;
+
+ list_add_tail(&dp->list_node, &dps);
+ rtnl_unlock();
+
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_datapath_multicast_group.id, info->nlhdr,
+ GFP_KERNEL);
+ return 0;
+
+err_destroy_local_port:
+ ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+err_destroy_percpu:
+ free_percpu(dp->stats_percpu);
+err_destroy_table:
+ ovs_flow_tbl_destroy(genl_dereference(dp->table));
+err_free_dp:
+ kfree(dp);
+err_put_module:
+ module_put(THIS_MODULE);
+err_unlock_rtnl:
+ rtnl_unlock();
+err:
+ return err;
+}
+
+static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct vport *vport, *next_vport;
+ struct sk_buff *reply;
+ struct datapath *dp;
+ int err;
+
+ rtnl_lock();
+ dp = lookup_datapath(info->userhdr, info->attrs);
+ err = PTR_ERR(dp);
+ if (IS_ERR(dp))
+ goto exit_unlock;
+
+ reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+ info->snd_seq, OVS_DP_CMD_DEL);
+ err = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto exit_unlock;
+
+ list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
+ if (vport->port_no != OVSP_LOCAL)
+ ovs_dp_detach_port(vport);
+
+ list_del(&dp->list_node);
+ ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+
+ /* rtnl_unlock() will wait until all the references to devices that
+ * are pending unregistration have been dropped. We do it here to
+ * ensure that any internal devices (which contain DP pointers) are
+ * fully destroyed before freeing the datapath.
+ */
+ rtnl_unlock();
+
+ call_rcu(&dp->rcu, destroy_dp_rcu);
+ module_put(THIS_MODULE);
+
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_datapath_multicast_group.id, info->nlhdr,
+ GFP_KERNEL);
+
+ return 0;
+
+exit_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *reply;
+ struct datapath *dp;
+ int err;
+
+ dp = lookup_datapath(info->userhdr, info->attrs);
+ if (IS_ERR(dp))
+ return PTR_ERR(dp);
+
+ reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+ info->snd_seq, OVS_DP_CMD_NEW);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ netlink_set_err(init_net.genl_sock, 0,
+ ovs_dp_datapath_multicast_group.id, err);
+ return 0;
+ }
+
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_datapath_multicast_group.id, info->nlhdr,
+ GFP_KERNEL);
+
+ return 0;
+}
+
+static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *reply;
+ struct datapath *dp;
+
+ dp = lookup_datapath(info->userhdr, info->attrs);
+ if (IS_ERR(dp))
+ return PTR_ERR(dp);
+
+ reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+ info->snd_seq, OVS_DP_CMD_NEW);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ return genlmsg_reply(reply, info);
+}
+
+static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct datapath *dp;
+ int skip = cb->args[0];
+ int i = 0;
+
+ list_for_each_entry(dp, &dps, list_node) {
+ if (i < skip)
+ continue;
+ if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ OVS_DP_CMD_NEW) < 0)
+ break;
+ i++;
+ }
+
+ cb->args[0] = i;
+
+ return skb->len;
+}
+
+static struct genl_ops dp_datapath_genl_ops[] = {
+ { .cmd = OVS_DP_CMD_NEW,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = datapath_policy,
+ .doit = ovs_dp_cmd_new
+ },
+ { .cmd = OVS_DP_CMD_DEL,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = datapath_policy,
+ .doit = ovs_dp_cmd_del
+ },
+ { .cmd = OVS_DP_CMD_GET,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = datapath_policy,
+ .doit = ovs_dp_cmd_get,
+ .dumpit = ovs_dp_cmd_dump
+ },
+ { .cmd = OVS_DP_CMD_SET,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = datapath_policy,
+ .doit = ovs_dp_cmd_set,
+ },
+};
+
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+ [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+ [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_family dp_vport_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct ovs_header),
+ .name = OVS_VPORT_FAMILY,
+ .version = OVS_VPORT_VERSION,
+ .maxattr = OVS_VPORT_ATTR_MAX
+};
+
+struct genl_multicast_group ovs_dp_vport_multicast_group = {
+ .name = OVS_VPORT_MCGROUP
+};
+
+/* Called with RTNL lock or RCU read lock. */
+static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+ u32 pid, u32 seq, u32 flags, u8 cmd)
+{
+ struct ovs_header *ovs_header;
+ struct ovs_vport_stats vport_stats;
+ int err;
+
+ ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
+ flags, cmd);
+ if (!ovs_header)
+ return -EMSGSIZE;
+
+ ovs_header->dp_ifindex = get_dpifindex(vport->dp);
+
+ NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
+ NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type);
+ NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport));
+ NLA_PUT_U32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid);
+
+ ovs_vport_get_stats(vport, &vport_stats);
+ NLA_PUT(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
+ &vport_stats);
+
+ err = ovs_vport_get_options(vport, skb);
+ if (err == -EMSGSIZE)
+ goto error;
+
+ return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+error:
+ genlmsg_cancel(skb, ovs_header);
+ return err;
+}
+
+/* Called with RTNL lock or RCU read lock. */
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
+ u32 seq, u8 cmd)
+{
+ struct sk_buff *skb;
+ int retval;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
+ if (retval < 0) {
+ kfree_skb(skb);
+ return ERR_PTR(retval);
+ }
+ return skb;
+}
+
+/* Called with RTNL lock or RCU read lock. */
+static struct vport *lookup_vport(struct ovs_header *ovs_header,
+ struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
+{
+ struct datapath *dp;
+ struct vport *vport;
+
+ if (a[OVS_VPORT_ATTR_NAME]) {
+ vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
+ if (!vport)
+ return ERR_PTR(-ENODEV);
+ return vport;
+ } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
+ u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
+
+ if (port_no >= DP_MAX_PORTS)
+ return ERR_PTR(-EFBIG);
+
+ dp = get_dp(ovs_header->dp_ifindex);
+ if (!dp)
+ return ERR_PTR(-ENODEV);
+
+ vport = rcu_dereference_rtnl(dp->ports[port_no]);
+ if (!vport)
+ return ERR_PTR(-ENOENT);
+ return vport;
+ } else
+ return ERR_PTR(-EINVAL);
+}
+
+static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct vport_parms parms;
+ struct sk_buff *reply;
+ struct vport *vport;
+ struct datapath *dp;
+ u32 port_no;
+ int err;
+
+ err = -EINVAL;
+ if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
+ !a[OVS_VPORT_ATTR_UPCALL_PID])
+ goto exit;
+
+ rtnl_lock();
+ dp = get_dp(ovs_header->dp_ifindex);
+ err = -ENODEV;
+ if (!dp)
+ goto exit_unlock;
+
+ if (a[OVS_VPORT_ATTR_PORT_NO]) {
+ port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
+
+ err = -EFBIG;
+ if (port_no >= DP_MAX_PORTS)
+ goto exit_unlock;
+
+ vport = rtnl_dereference(dp->ports[port_no]);
+ err = -EBUSY;
+ if (vport)
+ goto exit_unlock;
+ } else {
+ for (port_no = 1; ; port_no++) {
+ if (port_no >= DP_MAX_PORTS) {
+ err = -EFBIG;
+ goto exit_unlock;
+ }
+ vport = rtnl_dereference(dp->ports[port_no]);
+ if (!vport)
+ break;
+ }
+ }
+
+ parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
+ parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
+ parms.options = a[OVS_VPORT_ATTR_OPTIONS];
+ parms.dp = dp;
+ parms.port_no = port_no;
+ parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+ vport = new_vport(&parms);
+ err = PTR_ERR(vport);
+ if (IS_ERR(vport))
+ goto exit_unlock;
+
+ reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ OVS_VPORT_CMD_NEW);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ ovs_dp_detach_port(vport);
+ goto exit_unlock;
+ }
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+ rtnl_unlock();
+exit:
+ return err;
+}
+
+static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct sk_buff *reply;
+ struct vport *vport;
+ int err;
+
+ rtnl_lock();
+ vport = lookup_vport(info->userhdr, a);
+ err = PTR_ERR(vport);
+ if (IS_ERR(vport))
+ goto exit_unlock;
+
+ err = 0;
+ if (a[OVS_VPORT_ATTR_TYPE] &&
+ nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
+ err = -EINVAL;
+
+ if (!err && a[OVS_VPORT_ATTR_OPTIONS])
+ err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
+ if (!err && a[OVS_VPORT_ATTR_UPCALL_PID])
+ vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+ reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ OVS_VPORT_CMD_NEW);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ netlink_set_err(init_net.genl_sock, 0,
+ ovs_dp_vport_multicast_group.id, err);
+ return 0;
+ }
+
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct sk_buff *reply;
+ struct vport *vport;
+ int err;
+
+ rtnl_lock();
+ vport = lookup_vport(info->userhdr, a);
+ err = PTR_ERR(vport);
+ if (IS_ERR(vport))
+ goto exit_unlock;
+
+ if (vport->port_no == OVSP_LOCAL) {
+ err = -EINVAL;
+ goto exit_unlock;
+ }
+
+ reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ OVS_VPORT_CMD_DEL);
+ err = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto exit_unlock;
+
+ ovs_dp_detach_port(vport);
+
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct ovs_header *ovs_header = info->userhdr;
+ struct sk_buff *reply;
+ struct vport *vport;
+ int err;
+
+ rcu_read_lock();
+ vport = lookup_vport(ovs_header, a);
+ err = PTR_ERR(vport);
+ if (IS_ERR(vport))
+ goto exit_unlock;
+
+ reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ OVS_VPORT_CMD_NEW);
+ err = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto exit_unlock;
+
+ rcu_read_unlock();
+
+ return genlmsg_reply(reply, info);
+
+exit_unlock:
+ rcu_read_unlock();
+ return err;
+}
+
+static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+ struct datapath *dp;
+ u32 port_no;
+ int retval;
+
+ dp = get_dp(ovs_header->dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
+ rcu_read_lock();
+ for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
+ struct vport *vport;
+
+ vport = rcu_dereference(dp->ports[port_no]);
+ if (!vport)
+ continue;
+
+ if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ OVS_VPORT_CMD_NEW) < 0)
+ break;
+ }
+ rcu_read_unlock();
+
+ cb->args[0] = port_no;
+ retval = skb->len;
+
+ return retval;
+}
+
+static void rehash_flow_table(struct work_struct *work)
+{
+ struct datapath *dp;
+
+ genl_lock();
+
+ list_for_each_entry(dp, &dps, list_node) {
+ struct flow_table *old_table = genl_dereference(dp->table);
+ struct flow_table *new_table;
+
+ new_table = ovs_flow_tbl_rehash(old_table);
+ if (!IS_ERR(new_table)) {
+ rcu_assign_pointer(dp->table, new_table);
+ ovs_flow_tbl_deferred_destroy(old_table);
+ }
+ }
+
+ genl_unlock();
+
+ schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+}
+
+static struct genl_ops dp_vport_genl_ops[] = {
+ { .cmd = OVS_VPORT_CMD_NEW,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = vport_policy,
+ .doit = ovs_vport_cmd_new
+ },
+ { .cmd = OVS_VPORT_CMD_DEL,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = vport_policy,
+ .doit = ovs_vport_cmd_del
+ },
+ { .cmd = OVS_VPORT_CMD_GET,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = vport_policy,
+ .doit = ovs_vport_cmd_get,
+ .dumpit = ovs_vport_cmd_dump
+ },
+ { .cmd = OVS_VPORT_CMD_SET,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = vport_policy,
+ .doit = ovs_vport_cmd_set,
+ },
+};
+
+struct genl_family_and_ops {
+ struct genl_family *family;
+ struct genl_ops *ops;
+ int n_ops;
+ struct genl_multicast_group *group;
+};
+
+static const struct genl_family_and_ops dp_genl_families[] = {
+ { &dp_datapath_genl_family,
+ dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
+ &ovs_dp_datapath_multicast_group },
+ { &dp_vport_genl_family,
+ dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
+ &ovs_dp_vport_multicast_group },
+ { &dp_flow_genl_family,
+ dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
+ &ovs_dp_flow_multicast_group },
+ { &dp_packet_genl_family,
+ dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
+ NULL },
+};
+
+static void dp_unregister_genl(int n_families)
+{
+ int i;
+
+ for (i = 0; i < n_families; i++)
+ genl_unregister_family(dp_genl_families[i].family);
+}
+
+static int dp_register_genl(void)
+{
+ int n_registered;
+ int err;
+ int i;
+
+ n_registered = 0;
+ for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
+ const struct genl_family_and_ops *f = &dp_genl_families[i];
+
+ err = genl_register_family_with_ops(f->family, f->ops,
+ f->n_ops);
+ if (err)
+ goto error;
+ n_registered++;
+
+ if (f->group) {
+ err = genl_register_mc_group(f->family, f->group);
+ if (err)
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ dp_unregister_genl(n_registered);
+ return err;
+}
+
+static int __init dp_init(void)
+{
+ struct sk_buff *dummy_skb;
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
+
+ pr_info("Open vSwitch switching datapath\n");
+
+ err = ovs_flow_init();
+ if (err)
+ goto error;
+
+ err = ovs_vport_init();
+ if (err)
+ goto error_flow_exit;
+
+ err = register_netdevice_notifier(&ovs_dp_device_notifier);
+ if (err)
+ goto error_vport_exit;
+
+ err = dp_register_genl();
+ if (err < 0)
+ goto error_unreg_notifier;
+
+ schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+
+ return 0;
+
+error_unreg_notifier:
+ unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_vport_exit:
+ ovs_vport_exit();
+error_flow_exit:
+ ovs_flow_exit();
+error:
+ return err;
+}
+
+static void dp_cleanup(void)
+{
+ cancel_delayed_work_sync(&rehash_flow_wq);
+ rcu_barrier();
+ dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
+ unregister_netdevice_notifier(&ovs_dp_device_notifier);
+ ovs_vport_exit();
+ ovs_flow_exit();
+}
+
+module_init(dp_init);
+module_exit(dp_cleanup);
+
+MODULE_DESCRIPTION("Open vSwitch switching datapath");
+MODULE_LICENSE("GPL");
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
new file mode 100644
index 000000000000..5b9f884b7055
--- /dev/null
+++ b/net/openvswitch/datapath.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef DATAPATH_H
+#define DATAPATH_H 1
+
+#include <asm/page.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/version.h>
+
+#include "flow.h"
+
+struct vport;
+
+#define DP_MAX_PORTS 1024
+#define SAMPLE_ACTION_DEPTH 3
+
+/**
+ * struct dp_stats_percpu - per-cpu packet processing statistics for a given
+ * datapath.
+ * @n_hit: Number of received packets for which a matching flow was found in
+ * the flow table.
+ * @n_miss: Number of received packets that had no matching flow in the flow
+ * table. The sum of @n_hit and @n_miss is the number of packets that have
+ * been received by the datapath.
+ * @n_lost: Number of received packets that had no matching flow in the flow
+ * table that could not be sent to userspace (normally due to an overflow in
+ * one of the datapath's queues).
+ */
+struct dp_stats_percpu {
+ u64 n_hit;
+ u64 n_missed;
+ u64 n_lost;
+ struct u64_stats_sync sync;
+};
+
+/**
+ * struct datapath - datapath for flow-based packet switching
+ * @rcu: RCU callback head for deferred destruction.
+ * @list_node: Element in global 'dps' list.
+ * @n_flows: Number of flows currently in flow table.
+ * @table: Current flow table. Protected by genl_lock and RCU.
+ * @ports: Map from port number to &struct vport. %OVSP_LOCAL port
+ * always exists, other ports may be %NULL. Protected by RTNL and RCU.
+ * @port_list: List of all ports in @ports in arbitrary order. RTNL required
+ * to iterate or modify.
+ * @stats_percpu: Per-CPU datapath statistics.
+ *
+ * Context: See the comment on locking at the top of datapath.c for additional
+ * locking information.
+ */
+struct datapath {
+ struct rcu_head rcu;
+ struct list_head list_node;
+
+ /* Flow table. */
+ struct flow_table __rcu *table;
+
+ /* Switch ports. */
+ struct vport __rcu *ports[DP_MAX_PORTS];
+ struct list_head port_list;
+
+ /* Stats. */
+ struct dp_stats_percpu __percpu *stats_percpu;
+};
+
+/**
+ * struct ovs_skb_cb - OVS data in skb CB
+ * @flow: The flow associated with this packet. May be %NULL if no flow.
+ */
+struct ovs_skb_cb {
+ struct sw_flow *flow;
+};
+#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
+
+/**
+ * struct dp_upcall - metadata to include with a packet to send to userspace
+ * @cmd: One of %OVS_PACKET_CMD_*.
+ * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
+ * @userdata: If nonnull, its u64 value is extracted and passed to userspace as
+ * %OVS_PACKET_ATTR_USERDATA.
+ * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
+ * packet is sent and the packet is accounted in the datapath's @n_lost
+ * counter.
+ */
+struct dp_upcall_info {
+ u8 cmd;
+ const struct sw_flow_key *key;
+ const struct nlattr *userdata;
+ u32 pid;
+};
+
+extern struct notifier_block ovs_dp_device_notifier;
+extern struct genl_multicast_group ovs_dp_vport_multicast_group;
+
+void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_detach_port(struct vport *);
+int ovs_dp_upcall(struct datapath *, struct sk_buff *,
+ const struct dp_upcall_info *);
+
+const char *ovs_dp_name(const struct datapath *dp);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
+ u8 cmd);
+
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
new file mode 100644
index 000000000000..46736518c453
--- /dev/null
+++ b/net/openvswitch/dp_notify.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+static int dp_device_event(struct notifier_block *unused, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct vport *vport;
+
+ if (ovs_is_internal_dev(dev))
+ vport = ovs_internal_dev_get_vport(dev);
+ else
+ vport = ovs_netdev_get_vport(dev);
+
+ if (!vport)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ if (!ovs_is_internal_dev(dev)) {
+ struct sk_buff *notify;
+
+ notify = ovs_vport_cmd_build_info(vport, 0, 0,
+ OVS_VPORT_CMD_DEL);
+ ovs_dp_detach_port(vport);
+ if (IS_ERR(notify)) {
+ netlink_set_err(init_net.genl_sock, 0,
+ ovs_dp_vport_multicast_group.id,
+ PTR_ERR(notify));
+ break;
+ }
+
+ genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id,
+ GFP_KERNEL);
+ }
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+struct notifier_block ovs_dp_device_notifier = {
+ .notifier_call = dp_device_event
+};
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
new file mode 100644
index 000000000000..fe7f020a843e
--- /dev/null
+++ b/net/openvswitch/flow.c
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+static struct kmem_cache *flow_cache;
+
+static int check_header(struct sk_buff *skb, int len)
+{
+ if (unlikely(skb->len < len))
+ return -EINVAL;
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return -ENOMEM;
+ return 0;
+}
+
+static bool arphdr_ok(struct sk_buff *skb)
+{
+ return pskb_may_pull(skb, skb_network_offset(skb) +
+ sizeof(struct arp_eth_header));
+}
+
+static int check_iphdr(struct sk_buff *skb)
+{
+ unsigned int nh_ofs = skb_network_offset(skb);
+ unsigned int ip_len;
+ int err;
+
+ err = check_header(skb, nh_ofs + sizeof(struct iphdr));
+ if (unlikely(err))
+ return err;
+
+ ip_len = ip_hdrlen(skb);
+ if (unlikely(ip_len < sizeof(struct iphdr) ||
+ skb->len < nh_ofs + ip_len))
+ return -EINVAL;
+
+ skb_set_transport_header(skb, nh_ofs + ip_len);
+ return 0;
+}
+
+static bool tcphdr_ok(struct sk_buff *skb)
+{
+ int th_ofs = skb_transport_offset(skb);
+ int tcp_len;
+
+ if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
+ return false;
+
+ tcp_len = tcp_hdrlen(skb);
+ if (unlikely(tcp_len < sizeof(struct tcphdr) ||
+ skb->len < th_ofs + tcp_len))
+ return false;
+
+ return true;
+}
+
+static bool udphdr_ok(struct sk_buff *skb)
+{
+ return pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct udphdr));
+}
+
+static bool icmphdr_ok(struct sk_buff *skb)
+{
+ return pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct icmphdr));
+}
+
+u64 ovs_flow_used_time(unsigned long flow_jiffies)
+{
+ struct timespec cur_ts;
+ u64 cur_ms, idle_ms;
+
+ ktime_get_ts(&cur_ts);
+ idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+ cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+ cur_ts.tv_nsec / NSEC_PER_MSEC;
+
+ return cur_ms - idle_ms;
+}
+
+#define SW_FLOW_KEY_OFFSET(field) \
+ (offsetof(struct sw_flow_key, field) + \
+ FIELD_SIZEOF(struct sw_flow_key, field))
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
+ int *key_lenp)
+{
+ unsigned int nh_ofs = skb_network_offset(skb);
+ unsigned int nh_len;
+ int payload_ofs;
+ struct ipv6hdr *nh;
+ uint8_t nexthdr;
+ __be16 frag_off;
+ int err;
+
+ *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
+
+ err = check_header(skb, nh_ofs + sizeof(*nh));
+ if (unlikely(err))
+ return err;
+
+ nh = ipv6_hdr(skb);
+ nexthdr = nh->nexthdr;
+ payload_ofs = (u8 *)(nh + 1) - skb->data;
+
+ key->ip.proto = NEXTHDR_NONE;
+ key->ip.tos = ipv6_get_dsfield(nh);
+ key->ip.ttl = nh->hop_limit;
+ key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+ key->ipv6.addr.src = nh->saddr;
+ key->ipv6.addr.dst = nh->daddr;
+
+ payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
+ if (unlikely(payload_ofs < 0))
+ return -EINVAL;
+
+ if (frag_off) {
+ if (frag_off & htons(~0x7))
+ key->ip.frag = OVS_FRAG_TYPE_LATER;
+ else
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
+ }
+
+ nh_len = payload_ofs - nh_ofs;
+ skb_set_transport_header(skb, nh_ofs + nh_len);
+ key->ip.proto = nexthdr;
+ return nh_len;
+}
+
+static bool icmp6hdr_ok(struct sk_buff *skb)
+{
+ return pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct icmp6hdr));
+}
+
+#define TCP_FLAGS_OFFSET 13
+#define TCP_FLAG_MASK 0x3f
+
+void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
+{
+ u8 tcp_flags = 0;
+
+ if (flow->key.eth.type == htons(ETH_P_IP) &&
+ flow->key.ip.proto == IPPROTO_TCP) {
+ u8 *tcp = (u8 *)tcp_hdr(skb);
+ tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
+ }
+
+ spin_lock(&flow->lock);
+ flow->used = jiffies;
+ flow->packet_count++;
+ flow->byte_count += skb->len;
+ flow->tcp_flags |= tcp_flags;
+ spin_unlock(&flow->lock);
+}
+
+struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
+{
+ int actions_len = nla_len(actions);
+ struct sw_flow_actions *sfa;
+
+ /* At least DP_MAX_PORTS actions are required to be able to flood a
+ * packet to every port. Factor of 2 allows for setting VLAN tags,
+ * etc. */
+ if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4))
+ return ERR_PTR(-EINVAL);
+
+ sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL);
+ if (!sfa)
+ return ERR_PTR(-ENOMEM);
+
+ sfa->actions_len = actions_len;
+ memcpy(sfa->actions, nla_data(actions), actions_len);
+ return sfa;
+}
+
+struct sw_flow *ovs_flow_alloc(void)
+{
+ struct sw_flow *flow;
+
+ flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&flow->lock);
+ flow->sf_acts = NULL;
+
+ return flow;
+}
+
+static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
+{
+ hash = jhash_1word(hash, table->hash_seed);
+ return flex_array_get(table->buckets,
+ (hash & (table->n_buckets - 1)));
+}
+
+static struct flex_array *alloc_buckets(unsigned int n_buckets)
+{
+ struct flex_array *buckets;
+ int i, err;
+
+ buckets = flex_array_alloc(sizeof(struct hlist_head *),
+ n_buckets, GFP_KERNEL);
+ if (!buckets)
+ return NULL;
+
+ err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
+ if (err) {
+ flex_array_free(buckets);
+ return NULL;
+ }
+
+ for (i = 0; i < n_buckets; i++)
+ INIT_HLIST_HEAD((struct hlist_head *)
+ flex_array_get(buckets, i));
+
+ return buckets;
+}
+
+static void free_buckets(struct flex_array *buckets)
+{
+ flex_array_free(buckets);
+}
+
+struct flow_table *ovs_flow_tbl_alloc(int new_size)
+{
+ struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
+
+ if (!table)
+ return NULL;
+
+ table->buckets = alloc_buckets(new_size);
+
+ if (!table->buckets) {
+ kfree(table);
+ return NULL;
+ }
+ table->n_buckets = new_size;
+ table->count = 0;
+ table->node_ver = 0;
+ table->keep_flows = false;
+ get_random_bytes(&table->hash_seed, sizeof(u32));
+
+ return table;
+}
+
+void ovs_flow_tbl_destroy(struct flow_table *table)
+{
+ int i;
+
+ if (!table)
+ return;
+
+ if (table->keep_flows)
+ goto skip_flows;
+
+ for (i = 0; i < table->n_buckets; i++) {
+ struct sw_flow *flow;
+ struct hlist_head *head = flex_array_get(table->buckets, i);
+ struct hlist_node *node, *n;
+ int ver = table->node_ver;
+
+ hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) {
+ hlist_del_rcu(&flow->hash_node[ver]);
+ ovs_flow_free(flow);
+ }
+ }
+
+skip_flows:
+ free_buckets(table->buckets);
+ kfree(table);
+}
+
+static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+ struct flow_table *table = container_of(rcu, struct flow_table, rcu);
+
+ ovs_flow_tbl_destroy(table);
+}
+
+void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
+{
+ if (!table)
+ return;
+
+ call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+}
+
+struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
+{
+ struct sw_flow *flow;
+ struct hlist_head *head;
+ struct hlist_node *n;
+ int ver;
+ int i;
+
+ ver = table->node_ver;
+ while (*bucket < table->n_buckets) {
+ i = 0;
+ head = flex_array_get(table->buckets, *bucket);
+ hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
+ if (i < *last) {
+ i++;
+ continue;
+ }
+ *last = i + 1;
+ return flow;
+ }
+ (*bucket)++;
+ *last = 0;
+ }
+
+ return NULL;
+}
+
+static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
+{
+ int old_ver;
+ int i;
+
+ old_ver = old->node_ver;
+ new->node_ver = !old_ver;
+
+ /* Insert in new table. */
+ for (i = 0; i < old->n_buckets; i++) {
+ struct sw_flow *flow;
+ struct hlist_head *head;
+ struct hlist_node *n;
+
+ head = flex_array_get(old->buckets, i);
+
+ hlist_for_each_entry(flow, n, head, hash_node[old_ver])
+ ovs_flow_tbl_insert(new, flow);
+ }
+ old->keep_flows = true;
+}
+
+static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
+{
+ struct flow_table *new_table;
+
+ new_table = ovs_flow_tbl_alloc(n_buckets);
+ if (!new_table)
+ return ERR_PTR(-ENOMEM);
+
+ flow_table_copy_flows(table, new_table);
+
+ return new_table;
+}
+
+struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
+{
+ return __flow_tbl_rehash(table, table->n_buckets);
+}
+
+struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
+{
+ return __flow_tbl_rehash(table, table->n_buckets * 2);
+}
+
+void ovs_flow_free(struct sw_flow *flow)
+{
+ if (unlikely(!flow))
+ return;
+
+ kfree((struct sf_flow_acts __force *)flow->sf_acts);
+ kmem_cache_free(flow_cache, flow);
+}
+
+/* RCU callback used by ovs_flow_deferred_free. */
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+ struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+
+ ovs_flow_free(flow);
+}
+
+/* Schedules 'flow' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_flow_deferred_free(struct sw_flow *flow)
+{
+ call_rcu(&flow->rcu, rcu_free_flow_callback);
+}
+
+/* RCU callback used by ovs_flow_deferred_free_acts. */
+static void rcu_free_acts_callback(struct rcu_head *rcu)
+{
+ struct sw_flow_actions *sf_acts = container_of(rcu,
+ struct sw_flow_actions, rcu);
+ kfree(sf_acts);
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
+{
+ call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ struct qtag_prefix {
+ __be16 eth_type; /* ETH_P_8021Q */
+ __be16 tci;
+ };
+ struct qtag_prefix *qp;
+
+ if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+ return 0;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
+ sizeof(__be16))))
+ return -ENOMEM;
+
+ qp = (struct qtag_prefix *) skb->data;
+ key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
+ __skb_pull(skb, sizeof(struct qtag_prefix));
+
+ return 0;
+}
+
+static __be16 parse_ethertype(struct sk_buff *skb)
+{
+ struct llc_snap_hdr {
+ u8 dsap; /* Always 0xAA */
+ u8 ssap; /* Always 0xAA */
+ u8 ctrl;
+ u8 oui[3];
+ __be16 ethertype;
+ };
+ struct llc_snap_hdr *llc;
+ __be16 proto;
+
+ proto = *(__be16 *) skb->data;
+ __skb_pull(skb, sizeof(__be16));
+
+ if (ntohs(proto) >= 1536)
+ return proto;
+
+ if (skb->len < sizeof(struct llc_snap_hdr))
+ return htons(ETH_P_802_2);
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
+ return htons(0);
+
+ llc = (struct llc_snap_hdr *) skb->data;
+ if (llc->dsap != LLC_SAP_SNAP ||
+ llc->ssap != LLC_SAP_SNAP ||
+ (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
+ return htons(ETH_P_802_2);
+
+ __skb_pull(skb, sizeof(struct llc_snap_hdr));
+ return llc->ethertype;
+}
+
+static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
+ int *key_lenp, int nh_len)
+{
+ struct icmp6hdr *icmp = icmp6_hdr(skb);
+ int error = 0;
+ int key_len;
+
+ /* The ICMPv6 type and code fields use the 16-bit transport port
+ * fields, so we need to store them in 16-bit network byte order.
+ */
+ key->ipv6.tp.src = htons(icmp->icmp6_type);
+ key->ipv6.tp.dst = htons(icmp->icmp6_code);
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+
+ if (icmp->icmp6_code == 0 &&
+ (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+ icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ int icmp_len = skb->len - skb_transport_offset(skb);
+ struct nd_msg *nd;
+ int offset;
+
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+
+ /* In order to process neighbor discovery options, we need the
+ * entire packet.
+ */
+ if (unlikely(icmp_len < sizeof(*nd)))
+ goto out;
+ if (unlikely(skb_linearize(skb))) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ nd = (struct nd_msg *)skb_transport_header(skb);
+ key->ipv6.nd.target = nd->target;
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+
+ icmp_len -= sizeof(*nd);
+ offset = 0;
+ while (icmp_len >= 8) {
+ struct nd_opt_hdr *nd_opt =
+ (struct nd_opt_hdr *)(nd->opt + offset);
+ int opt_len = nd_opt->nd_opt_len * 8;
+
+ if (unlikely(!opt_len || opt_len > icmp_len))
+ goto invalid;
+
+ /* Store the link layer address if the appropriate
+ * option is provided. It is considered an error if
+ * the same link layer option is specified twice.
+ */
+ if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
+ && opt_len == 8) {
+ if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
+ goto invalid;
+ memcpy(key->ipv6.nd.sll,
+ &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
+ && opt_len == 8) {
+ if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
+ goto invalid;
+ memcpy(key->ipv6.nd.tll,
+ &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ }
+
+ icmp_len -= opt_len;
+ offset += opt_len;
+ }
+ }
+
+ goto out;
+
+invalid:
+ memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
+ memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
+ memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
+
+out:
+ *key_lenp = key_len;
+ return error;
+}
+
+/**
+ * ovs_flow_extract - extracts a flow key from an Ethernet frame.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ * Ethernet header
+ * @in_port: port number on which @skb was received.
+ * @key: output flow key
+ * @key_lenp: length of output flow key
+ *
+ * The caller must ensure that skb->len >= ETH_HLEN.
+ *
+ * Returns 0 if successful, otherwise a negative errno value.
+ *
+ * Initializes @skb header pointers as follows:
+ *
+ * - skb->mac_header: the Ethernet header.
+ *
+ * - skb->network_header: just past the Ethernet header, or just past the
+ * VLAN header, to the first byte of the Ethernet payload.
+ *
+ * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6
+ * on output, then just past the IP header, if one is present and
+ * of a correct length, otherwise the same as skb->network_header.
+ * For other key->dl_type values it is left untouched.
+ */
+int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
+ int *key_lenp)
+{
+ int error = 0;
+ int key_len = SW_FLOW_KEY_OFFSET(eth);
+ struct ethhdr *eth;
+
+ memset(key, 0, sizeof(*key));
+
+ key->phy.priority = skb->priority;
+ key->phy.in_port = in_port;
+
+ skb_reset_mac_header(skb);
+
+ /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
+ * header in the linear data area.
+ */
+ eth = eth_hdr(skb);
+ memcpy(key->eth.src, eth->h_source, ETH_ALEN);
+ memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+
+ __skb_pull(skb, 2 * ETH_ALEN);
+
+ if (vlan_tx_tag_present(skb))
+ key->eth.tci = htons(skb->vlan_tci);
+ else if (eth->h_proto == htons(ETH_P_8021Q))
+ if (unlikely(parse_vlan(skb, key)))
+ return -ENOMEM;
+
+ key->eth.type = parse_ethertype(skb);
+ if (unlikely(key->eth.type == htons(0)))
+ return -ENOMEM;
+
+ skb_reset_network_header(skb);
+ __skb_push(skb, skb->data - skb_mac_header(skb));
+
+ /* Network layer. */
+ if (key->eth.type == htons(ETH_P_IP)) {
+ struct iphdr *nh;
+ __be16 offset;
+
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
+
+ error = check_iphdr(skb);
+ if (unlikely(error)) {
+ if (error == -EINVAL) {
+ skb->transport_header = skb->network_header;
+ error = 0;
+ }
+ goto out;
+ }
+
+ nh = ip_hdr(skb);
+ key->ipv4.addr.src = nh->saddr;
+ key->ipv4.addr.dst = nh->daddr;
+
+ key->ip.proto = nh->protocol;
+ key->ip.tos = nh->tos;
+ key->ip.ttl = nh->ttl;
+
+ offset = nh->frag_off & htons(IP_OFFSET);
+ if (offset) {
+ key->ip.frag = OVS_FRAG_TYPE_LATER;
+ goto out;
+ }
+ if (nh->frag_off & htons(IP_MF) ||
+ skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+ /* Transport layer. */
+ if (key->ip.proto == IPPROTO_TCP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+ if (tcphdr_ok(skb)) {
+ struct tcphdr *tcp = tcp_hdr(skb);
+ key->ipv4.tp.src = tcp->source;
+ key->ipv4.tp.dst = tcp->dest;
+ }
+ } else if (key->ip.proto == IPPROTO_UDP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+ if (udphdr_ok(skb)) {
+ struct udphdr *udp = udp_hdr(skb);
+ key->ipv4.tp.src = udp->source;
+ key->ipv4.tp.dst = udp->dest;
+ }
+ } else if (key->ip.proto == IPPROTO_ICMP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+ if (icmphdr_ok(skb)) {
+ struct icmphdr *icmp = icmp_hdr(skb);
+ /* The ICMP type and code fields use the 16-bit
+ * transport port fields, so we need to store
+ * them in 16-bit network byte order. */
+ key->ipv4.tp.src = htons(icmp->type);
+ key->ipv4.tp.dst = htons(icmp->code);
+ }
+ }
+
+ } else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
+ struct arp_eth_header *arp;
+
+ arp = (struct arp_eth_header *)skb_network_header(skb);
+
+ if (arp->ar_hrd == htons(ARPHRD_ETHER)
+ && arp->ar_pro == htons(ETH_P_IP)
+ && arp->ar_hln == ETH_ALEN
+ && arp->ar_pln == 4) {
+
+ /* We only match on the lower 8 bits of the opcode. */
+ if (ntohs(arp->ar_op) <= 0xff)
+ key->ip.proto = ntohs(arp->ar_op);
+
+ if (key->ip.proto == ARPOP_REQUEST
+ || key->ip.proto == ARPOP_REPLY) {
+ memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
+ memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
+ memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
+ memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+ }
+ }
+ } else if (key->eth.type == htons(ETH_P_IPV6)) {
+ int nh_len; /* IPv6 Header + Extensions */
+
+ nh_len = parse_ipv6hdr(skb, key, &key_len);
+ if (unlikely(nh_len < 0)) {
+ if (nh_len == -EINVAL)
+ skb->transport_header = skb->network_header;
+ else
+ error = nh_len;
+ goto out;
+ }
+
+ if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+ goto out;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+ /* Transport layer. */
+ if (key->ip.proto == NEXTHDR_TCP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+ if (tcphdr_ok(skb)) {
+ struct tcphdr *tcp = tcp_hdr(skb);
+ key->ipv6.tp.src = tcp->source;
+ key->ipv6.tp.dst = tcp->dest;
+ }
+ } else if (key->ip.proto == NEXTHDR_UDP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+ if (udphdr_ok(skb)) {
+ struct udphdr *udp = udp_hdr(skb);
+ key->ipv6.tp.src = udp->source;
+ key->ipv6.tp.dst = udp->dest;
+ }
+ } else if (key->ip.proto == NEXTHDR_ICMP) {
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+ if (icmp6hdr_ok(skb)) {
+ error = parse_icmpv6(skb, key, &key_len, nh_len);
+ if (error < 0)
+ goto out;
+ }
+ }
+ }
+
+out:
+ *key_lenp = key_len;
+ return error;
+}
+
+u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len)
+{
+ return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
+ struct sw_flow_key *key, int key_len)
+{
+ struct sw_flow *flow;
+ struct hlist_node *n;
+ struct hlist_head *head;
+ u32 hash;
+
+ hash = ovs_flow_hash(key, key_len);
+
+ head = find_bucket(table, hash);
+ hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) {
+
+ if (flow->hash == hash &&
+ !memcmp(&flow->key, key, key_len)) {
+ return flow;
+ }
+ }
+ return NULL;
+}
+
+void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+{
+ struct hlist_head *head;
+
+ head = find_bucket(table, flow->hash);
+ hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+ table->count++;
+}
+
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+ hlist_del_rcu(&flow->hash_node[table->node_ver]);
+ table->count--;
+ BUG_ON(table->count < 0);
+}
+
+/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
+const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+ [OVS_KEY_ATTR_ENCAP] = -1,
+ [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
+ [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+ [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
+ [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
+ [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+ [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
+ [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
+ [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
+ [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+ [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
+ [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
+ [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
+ [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+};
+
+static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
+ const struct nlattr *a[], u32 *attrs)
+{
+ const struct ovs_key_icmp *icmp_key;
+ const struct ovs_key_tcp *tcp_key;
+ const struct ovs_key_udp *udp_key;
+
+ switch (swkey->ip.proto) {
+ case IPPROTO_TCP:
+ if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+
+ *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+ tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+ swkey->ipv4.tp.src = tcp_key->tcp_src;
+ swkey->ipv4.tp.dst = tcp_key->tcp_dst;
+ break;
+
+ case IPPROTO_UDP:
+ if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+
+ *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+ udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+ swkey->ipv4.tp.src = udp_key->udp_src;
+ swkey->ipv4.tp.dst = udp_key->udp_dst;
+ break;
+
+ case IPPROTO_ICMP:
+ if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+
+ *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+ icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+ swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
+ swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
+ break;
+ }
+
+ return 0;
+}
+
+static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
+ const struct nlattr *a[], u32 *attrs)
+{
+ const struct ovs_key_icmpv6 *icmpv6_key;
+ const struct ovs_key_tcp *tcp_key;
+ const struct ovs_key_udp *udp_key;
+
+ switch (swkey->ip.proto) {
+ case IPPROTO_TCP:
+ if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+ tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+ swkey->ipv6.tp.src = tcp_key->tcp_src;
+ swkey->ipv6.tp.dst = tcp_key->tcp_dst;
+ break;
+
+ case IPPROTO_UDP:
+ if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+ udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+ swkey->ipv6.tp.src = udp_key->udp_src;
+ swkey->ipv6.tp.dst = udp_key->udp_dst;
+ break;
+
+ case IPPROTO_ICMPV6:
+ if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+ icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+ swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
+ swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
+
+ if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ const struct ovs_key_nd *nd_key;
+
+ if (!(*attrs & (1 << OVS_KEY_ATTR_ND)))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_ND);
+
+ *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+ nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+ memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
+ sizeof(swkey->ipv6.nd.target));
+ memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
+ memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[], u32 *attrsp)
+{
+ const struct nlattr *nla;
+ u32 attrs;
+ int rem;
+
+ attrs = 0;
+ nla_for_each_nested(nla, attr, rem) {
+ u16 type = nla_type(nla);
+ int expected_len;
+
+ if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type))
+ return -EINVAL;
+
+ expected_len = ovs_key_lens[type];
+ if (nla_len(nla) != expected_len && expected_len != -1)
+ return -EINVAL;
+
+ attrs |= 1 << type;
+ a[type] = nla;
+ }
+ if (rem)
+ return -EINVAL;
+
+ *attrsp = attrs;
+ return 0;
+}
+
+/**
+ * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
+ * @swkey: receives the extracted flow key.
+ * @key_lenp: number of bytes used in @swkey.
+ * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ */
+int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+ const struct nlattr *attr)
+{
+ const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ const struct ovs_key_ethernet *eth_key;
+ int key_len;
+ u32 attrs;
+ int err;
+
+ memset(swkey, 0, sizeof(struct sw_flow_key));
+ key_len = SW_FLOW_KEY_OFFSET(eth);
+
+ err = parse_flow_nlattrs(attr, a, &attrs);
+ if (err)
+ return err;
+
+ /* Metadata attributes. */
+ if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+ swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
+ attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+ }
+ if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+ u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+ if (in_port >= DP_MAX_PORTS)
+ return -EINVAL;
+ swkey->phy.in_port = in_port;
+ attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+ } else {
+ swkey->phy.in_port = USHRT_MAX;
+ }
+
+ /* Data attributes. */
+ if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+
+ eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+ memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
+ memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
+
+ if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
+ nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
+ const struct nlattr *encap;
+ __be16 tci;
+
+ if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
+ (1 << OVS_KEY_ATTR_ETHERTYPE) |
+ (1 << OVS_KEY_ATTR_ENCAP)))
+ return -EINVAL;
+
+ encap = a[OVS_KEY_ATTR_ENCAP];
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+ if (tci & htons(VLAN_TAG_PRESENT)) {
+ swkey->eth.tci = tci;
+
+ err = parse_flow_nlattrs(encap, a, &attrs);
+ if (err)
+ return err;
+ } else if (!tci) {
+ /* Corner case for truncated 802.1Q header. */
+ if (nla_len(encap))
+ return -EINVAL;
+
+ swkey->eth.type = htons(ETH_P_8021Q);
+ *key_lenp = key_len;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+ swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+ if (ntohs(swkey->eth.type) < 1536)
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ } else {
+ swkey->eth.type = htons(ETH_P_802_2);
+ }
+
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ const struct ovs_key_ipv4 *ipv4_key;
+
+ if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
+ ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+ if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
+ return -EINVAL;
+ swkey->ip.proto = ipv4_key->ipv4_proto;
+ swkey->ip.tos = ipv4_key->ipv4_tos;
+ swkey->ip.ttl = ipv4_key->ipv4_ttl;
+ swkey->ip.frag = ipv4_key->ipv4_frag;
+ swkey->ipv4.addr.src = ipv4_key->ipv4_src;
+ swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
+
+ if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+ err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
+ if (err)
+ return err;
+ }
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ const struct ovs_key_ipv6 *ipv6_key;
+
+ if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+
+ key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
+ ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+ if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
+ return -EINVAL;
+ swkey->ipv6.label = ipv6_key->ipv6_label;
+ swkey->ip.proto = ipv6_key->ipv6_proto;
+ swkey->ip.tos = ipv6_key->ipv6_tclass;
+ swkey->ip.ttl = ipv6_key->ipv6_hlimit;
+ swkey->ip.frag = ipv6_key->ipv6_frag;
+ memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
+ sizeof(swkey->ipv6.addr.src));
+ memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
+ sizeof(swkey->ipv6.addr.dst));
+
+ if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+ err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
+ if (err)
+ return err;
+ }
+ } else if (swkey->eth.type == htons(ETH_P_ARP)) {
+ const struct ovs_key_arp *arp_key;
+
+ if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
+ return -EINVAL;
+ attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+ arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+ swkey->ipv4.addr.src = arp_key->arp_sip;
+ swkey->ipv4.addr.dst = arp_key->arp_tip;
+ if (arp_key->arp_op & htons(0xff00))
+ return -EINVAL;
+ swkey->ip.proto = ntohs(arp_key->arp_op);
+ memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
+ memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+ }
+
+ if (attrs)
+ return -EINVAL;
+ *key_lenp = key_len;
+
+ return 0;
+}
+
+/**
+ * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
+ * @in_port: receives the extracted input port.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ */
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+ const struct nlattr *attr)
+{
+ const struct nlattr *nla;
+ int rem;
+
+ *in_port = USHRT_MAX;
+ *priority = 0;
+
+ nla_for_each_nested(nla, attr, rem) {
+ int type = nla_type(nla);
+
+ if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
+ if (nla_len(nla) != ovs_key_lens[type])
+ return -EINVAL;
+
+ switch (type) {
+ case OVS_KEY_ATTR_PRIORITY:
+ *priority = nla_get_u32(nla);
+ break;
+
+ case OVS_KEY_ATTR_IN_PORT:
+ if (nla_get_u32(nla) >= DP_MAX_PORTS)
+ return -EINVAL;
+ *in_port = nla_get_u32(nla);
+ break;
+ }
+ }
+ }
+ if (rem)
+ return -EINVAL;
+ return 0;
+}
+
+int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+{
+ struct ovs_key_ethernet *eth_key;
+ struct nlattr *nla, *encap;
+
+ if (swkey->phy.priority)
+ NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority);
+
+ if (swkey->phy.in_port != USHRT_MAX)
+ NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port);
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+ if (!nla)
+ goto nla_put_failure;
+ eth_key = nla_data(nla);
+ memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN);
+ memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
+
+ if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
+ NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q));
+ NLA_PUT_BE16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci);
+ encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+ if (!swkey->eth.tci)
+ goto unencap;
+ } else {
+ encap = NULL;
+ }
+
+ if (swkey->eth.type == htons(ETH_P_802_2))
+ goto unencap;
+
+ NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type);
+
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ struct ovs_key_ipv4 *ipv4_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+ if (!nla)
+ goto nla_put_failure;
+ ipv4_key = nla_data(nla);
+ ipv4_key->ipv4_src = swkey->ipv4.addr.src;
+ ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
+ ipv4_key->ipv4_proto = swkey->ip.proto;
+ ipv4_key->ipv4_tos = swkey->ip.tos;
+ ipv4_key->ipv4_ttl = swkey->ip.ttl;
+ ipv4_key->ipv4_frag = swkey->ip.frag;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ struct ovs_key_ipv6 *ipv6_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+ if (!nla)
+ goto nla_put_failure;
+ ipv6_key = nla_data(nla);
+ memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
+ sizeof(ipv6_key->ipv6_src));
+ memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
+ sizeof(ipv6_key->ipv6_dst));
+ ipv6_key->ipv6_label = swkey->ipv6.label;
+ ipv6_key->ipv6_proto = swkey->ip.proto;
+ ipv6_key->ipv6_tclass = swkey->ip.tos;
+ ipv6_key->ipv6_hlimit = swkey->ip.ttl;
+ ipv6_key->ipv6_frag = swkey->ip.frag;
+ } else if (swkey->eth.type == htons(ETH_P_ARP)) {
+ struct ovs_key_arp *arp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
+ if (!nla)
+ goto nla_put_failure;
+ arp_key = nla_data(nla);
+ memset(arp_key, 0, sizeof(struct ovs_key_arp));
+ arp_key->arp_sip = swkey->ipv4.addr.src;
+ arp_key->arp_tip = swkey->ipv4.addr.dst;
+ arp_key->arp_op = htons(swkey->ip.proto);
+ memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
+ memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+ }
+
+ if ((swkey->eth.type == htons(ETH_P_IP) ||
+ swkey->eth.type == htons(ETH_P_IPV6)) &&
+ swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+
+ if (swkey->ip.proto == IPPROTO_TCP) {
+ struct ovs_key_tcp *tcp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
+ if (!nla)
+ goto nla_put_failure;
+ tcp_key = nla_data(nla);
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ tcp_key->tcp_src = swkey->ipv4.tp.src;
+ tcp_key->tcp_dst = swkey->ipv4.tp.dst;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ tcp_key->tcp_src = swkey->ipv6.tp.src;
+ tcp_key->tcp_dst = swkey->ipv6.tp.dst;
+ }
+ } else if (swkey->ip.proto == IPPROTO_UDP) {
+ struct ovs_key_udp *udp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
+ if (!nla)
+ goto nla_put_failure;
+ udp_key = nla_data(nla);
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ udp_key->udp_src = swkey->ipv4.tp.src;
+ udp_key->udp_dst = swkey->ipv4.tp.dst;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ udp_key->udp_src = swkey->ipv6.tp.src;
+ udp_key->udp_dst = swkey->ipv6.tp.dst;
+ }
+ } else if (swkey->eth.type == htons(ETH_P_IP) &&
+ swkey->ip.proto == IPPROTO_ICMP) {
+ struct ovs_key_icmp *icmp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
+ if (!nla)
+ goto nla_put_failure;
+ icmp_key = nla_data(nla);
+ icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
+ icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
+ } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
+ swkey->ip.proto == IPPROTO_ICMPV6) {
+ struct ovs_key_icmpv6 *icmpv6_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
+ sizeof(*icmpv6_key));
+ if (!nla)
+ goto nla_put_failure;
+ icmpv6_key = nla_data(nla);
+ icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
+ icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
+
+ if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+ icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+ struct ovs_key_nd *nd_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+ if (!nla)
+ goto nla_put_failure;
+ nd_key = nla_data(nla);
+ memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
+ sizeof(nd_key->nd_target));
+ memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
+ memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
+ }
+ }
+ }
+
+unencap:
+ if (encap)
+ nla_nest_end(skb, encap);
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int ovs_flow_init(void)
+{
+ flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
+ 0, NULL);
+ if (flow_cache == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Uninitializes the flow module. */
+void ovs_flow_exit(void)
+{
+ kmem_cache_destroy(flow_cache);
+}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
new file mode 100644
index 000000000000..2747dc2c4ac1
--- /dev/null
+++ b/net/openvswitch/flow.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef FLOW_H
+#define FLOW_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+#include <net/inet_ecn.h>
+
+struct sk_buff;
+
+struct sw_flow_actions {
+ struct rcu_head rcu;
+ u32 actions_len;
+ struct nlattr actions[];
+};
+
+struct sw_flow_key {
+ struct {
+ u32 priority; /* Packet QoS priority. */
+ u16 in_port; /* Input switch port (or USHRT_MAX). */
+ } phy;
+ struct {
+ u8 src[ETH_ALEN]; /* Ethernet source address. */
+ u8 dst[ETH_ALEN]; /* Ethernet destination address. */
+ __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+ __be16 type; /* Ethernet frame type. */
+ } eth;
+ struct {
+ u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
+ u8 tos; /* IP ToS. */
+ u8 ttl; /* IP TTL/hop limit. */
+ u8 frag; /* One of OVS_FRAG_TYPE_*. */
+ } ip;
+ union {
+ struct {
+ struct {
+ __be32 src; /* IP source address. */
+ __be32 dst; /* IP destination address. */
+ } addr;
+ union {
+ struct {
+ __be16 src; /* TCP/UDP source port. */
+ __be16 dst; /* TCP/UDP destination port. */
+ } tp;
+ struct {
+ u8 sha[ETH_ALEN]; /* ARP source hardware address. */
+ u8 tha[ETH_ALEN]; /* ARP target hardware address. */
+ } arp;
+ };
+ } ipv4;
+ struct {
+ struct {
+ struct in6_addr src; /* IPv6 source address. */
+ struct in6_addr dst; /* IPv6 destination address. */
+ } addr;
+ __be32 label; /* IPv6 flow label. */
+ struct {
+ __be16 src; /* TCP/UDP source port. */
+ __be16 dst; /* TCP/UDP destination port. */
+ } tp;
+ struct {
+ struct in6_addr target; /* ND target address. */
+ u8 sll[ETH_ALEN]; /* ND source link layer address. */
+ u8 tll[ETH_ALEN]; /* ND target link layer address. */
+ } nd;
+ } ipv6;
+ };
+};
+
+struct sw_flow {
+ struct rcu_head rcu;
+ struct hlist_node hash_node[2];
+ u32 hash;
+
+ struct sw_flow_key key;
+ struct sw_flow_actions __rcu *sf_acts;
+
+ spinlock_t lock; /* Lock for values below. */
+ unsigned long used; /* Last used time (in jiffies). */
+ u64 packet_count; /* Number of packets matched. */
+ u64 byte_count; /* Number of bytes matched. */
+ u8 tcp_flags; /* Union of seen TCP flags. */
+};
+
+struct arp_eth_header {
+ __be16 ar_hrd; /* format of hardware address */
+ __be16 ar_pro; /* format of protocol address */
+ unsigned char ar_hln; /* length of hardware address */
+ unsigned char ar_pln; /* length of protocol address */
+ __be16 ar_op; /* ARP opcode (command) */
+
+ /* Ethernet+IPv4 specific members. */
+ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
+ unsigned char ar_sip[4]; /* sender IP address */
+ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
+ unsigned char ar_tip[4]; /* target IP address */
+} __packed;
+
+int ovs_flow_init(void);
+void ovs_flow_exit(void);
+
+struct sw_flow *ovs_flow_alloc(void);
+void ovs_flow_deferred_free(struct sw_flow *);
+void ovs_flow_free(struct sw_flow *flow);
+
+struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *);
+void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
+
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
+ int *key_lenp);
+void ovs_flow_used(struct sw_flow *, struct sk_buff *);
+u64 ovs_flow_used_time(unsigned long flow_jiffies);
+
+/* Upper bound on the length of a nlattr-formatted flow key. The longest
+ * nlattr-formatted flow key would be:
+ *
+ * struct pad nl hdr total
+ * ------ --- ------ -----
+ * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
+ * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
+ * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
+ * OVS_KEY_ATTR_8021Q 4 -- 4 8
+ * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8
+ * OVS_KEY_ATTR_IPV6 40 -- 4 44
+ * OVS_KEY_ATTR_ICMPV6 2 2 4 8
+ * OVS_KEY_ATTR_ND 28 -- 4 32
+ * -------------------------------------------------
+ * total 132
+ */
+#define FLOW_BUFSIZE 132
+
+int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
+int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+ const struct nlattr *);
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+ const struct nlattr *);
+
+#define TBL_MIN_BUCKETS 1024
+
+struct flow_table {
+ struct flex_array *buckets;
+ unsigned int count, n_buckets;
+ struct rcu_head rcu;
+ int node_ver;
+ u32 hash_seed;
+ bool keep_flows;
+};
+
+static inline int ovs_flow_tbl_count(struct flow_table *table)
+{
+ return table->count;
+}
+
+static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
+{
+ return (table->count > table->n_buckets);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
+ struct sw_flow_key *key, int len);
+void ovs_flow_tbl_destroy(struct flow_table *table);
+void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
+struct flow_table *ovs_flow_tbl_alloc(int new_size);
+struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
+struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
+void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow);
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len);
+
+struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
+extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
+
+#endif /* flow.h */
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
new file mode 100644
index 000000000000..8fc28b86f2b3
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/hardirq.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/version.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+struct internal_dev {
+ struct vport *vport;
+};
+
+static struct internal_dev *internal_dev_priv(struct net_device *netdev)
+{
+ return netdev_priv(netdev);
+}
+
+/* This function is only called by the kernel network layer.*/
+static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct vport *vport = ovs_internal_dev_get_vport(netdev);
+ struct ovs_vport_stats vport_stats;
+
+ ovs_vport_get_stats(vport, &vport_stats);
+
+ /* The tx and rx stats need to be swapped because the
+ * switch and host OS have opposite perspectives. */
+ stats->rx_packets = vport_stats.tx_packets;
+ stats->tx_packets = vport_stats.rx_packets;
+ stats->rx_bytes = vport_stats.tx_bytes;
+ stats->tx_bytes = vport_stats.rx_bytes;
+ stats->rx_errors = vport_stats.tx_errors;
+ stats->tx_errors = vport_stats.rx_errors;
+ stats->rx_dropped = vport_stats.tx_dropped;
+ stats->tx_dropped = vport_stats.rx_dropped;
+
+ return stats;
+}
+
+static int internal_dev_mac_addr(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return 0;
+}
+
+/* Called with rcu_read_lock_bh. */
+static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ rcu_read_lock();
+ ovs_vport_receive(internal_dev_priv(netdev)->vport, skb);
+ rcu_read_unlock();
+ return 0;
+}
+
+static int internal_dev_open(struct net_device *netdev)
+{
+ netif_start_queue(netdev);
+ return 0;
+}
+
+static int internal_dev_stop(struct net_device *netdev)
+{
+ netif_stop_queue(netdev);
+ return 0;
+}
+
+static void internal_dev_getinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *info)
+{
+ strcpy(info->driver, "openvswitch");
+}
+
+static const struct ethtool_ops internal_dev_ethtool_ops = {
+ .get_drvinfo = internal_dev_getinfo,
+ .get_link = ethtool_op_get_link,
+};
+
+static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ if (new_mtu < 68)
+ return -EINVAL;
+
+ netdev->mtu = new_mtu;
+ return 0;
+}
+
+static void internal_dev_destructor(struct net_device *dev)
+{
+ struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+ ovs_vport_free(vport);
+ free_netdev(dev);
+}
+
+static const struct net_device_ops internal_dev_netdev_ops = {
+ .ndo_open = internal_dev_open,
+ .ndo_stop = internal_dev_stop,
+ .ndo_start_xmit = internal_dev_xmit,
+ .ndo_set_mac_address = internal_dev_mac_addr,
+ .ndo_change_mtu = internal_dev_change_mtu,
+ .ndo_get_stats64 = internal_dev_get_stats,
+};
+
+static void do_setup(struct net_device *netdev)
+{
+ ether_setup(netdev);
+
+ netdev->netdev_ops = &internal_dev_netdev_ops;
+
+ netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ netdev->destructor = internal_dev_destructor;
+ SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
+ netdev->tx_queue_len = 0;
+
+ netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
+ NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+
+ netdev->vlan_features = netdev->features;
+ netdev->features |= NETIF_F_HW_VLAN_TX;
+ netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+ random_ether_addr(netdev->dev_addr);
+}
+
+static struct vport *internal_dev_create(const struct vport_parms *parms)
+{
+ struct vport *vport;
+ struct netdev_vport *netdev_vport;
+ struct internal_dev *internal_dev;
+ int err;
+
+ vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+ &ovs_internal_vport_ops, parms);
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ goto error;
+ }
+
+ netdev_vport = netdev_vport_priv(vport);
+
+ netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
+ parms->name, do_setup);
+ if (!netdev_vport->dev) {
+ err = -ENOMEM;
+ goto error_free_vport;
+ }
+
+ internal_dev = internal_dev_priv(netdev_vport->dev);
+ internal_dev->vport = vport;
+
+ err = register_netdevice(netdev_vport->dev);
+ if (err)
+ goto error_free_netdev;
+
+ dev_set_promiscuity(netdev_vport->dev, 1);
+ netif_start_queue(netdev_vport->dev);
+
+ return vport;
+
+error_free_netdev:
+ free_netdev(netdev_vport->dev);
+error_free_vport:
+ ovs_vport_free(vport);
+error:
+ return ERR_PTR(err);
+}
+
+static void internal_dev_destroy(struct vport *vport)
+{
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+ netif_stop_queue(netdev_vport->dev);
+ dev_set_promiscuity(netdev_vport->dev, -1);
+
+ /* unregister_netdevice() waits for an RCU grace period. */
+ unregister_netdevice(netdev_vport->dev);
+}
+
+static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+{
+ struct net_device *netdev = netdev_vport_priv(vport)->dev;
+ int len;
+
+ len = skb->len;
+ skb->dev = netdev;
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ netif_rx(skb);
+
+ return len;
+}
+
+const struct vport_ops ovs_internal_vport_ops = {
+ .type = OVS_VPORT_TYPE_INTERNAL,
+ .create = internal_dev_create,
+ .destroy = internal_dev_destroy,
+ .get_name = ovs_netdev_get_name,
+ .get_ifindex = ovs_netdev_get_ifindex,
+ .send = internal_dev_recv,
+};
+
+int ovs_is_internal_dev(const struct net_device *netdev)
+{
+ return netdev->netdev_ops == &internal_dev_netdev_ops;
+}
+
+struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
+{
+ if (!ovs_is_internal_dev(netdev))
+ return NULL;
+
+ return internal_dev_priv(netdev)->vport;
+}
diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h
new file mode 100644
index 000000000000..3454447c5f11
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_INTERNAL_DEV_H
+#define VPORT_INTERNAL_DEV_H 1
+
+#include "datapath.h"
+#include "vport.h"
+
+int ovs_is_internal_dev(const struct net_device *);
+struct vport *ovs_internal_dev_get_vport(struct net_device *);
+
+#endif /* vport-internal_dev.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
new file mode 100644
index 000000000000..c1068aed03d1
--- /dev/null
+++ b/net/openvswitch/vport-netdev.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if_arp.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/llc.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/llc.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+/* Must be called with rcu_read_lock. */
+static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
+{
+ if (unlikely(!vport)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ /* Make our own copy of the packet. Otherwise we will mangle the
+ * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
+ * (No one comes after us, since we tell handle_bridge() that we took
+ * the packet.) */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+
+ skb_push(skb, ETH_HLEN);
+ ovs_vport_receive(vport, skb);
+}
+
+/* Called with rcu_read_lock and bottom-halves disabled. */
+static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
+{
+ struct sk_buff *skb = *pskb;
+ struct vport *vport;
+
+ if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+ return RX_HANDLER_PASS;
+
+ vport = ovs_netdev_get_vport(skb->dev);
+
+ netdev_port_receive(vport, skb);
+
+ return RX_HANDLER_CONSUMED;
+}
+
+static struct vport *netdev_create(const struct vport_parms *parms)
+{
+ struct vport *vport;
+ struct netdev_vport *netdev_vport;
+ int err;
+
+ vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+ &ovs_netdev_vport_ops, parms);
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ goto error;
+ }
+
+ netdev_vport = netdev_vport_priv(vport);
+
+ netdev_vport->dev = dev_get_by_name(&init_net, parms->name);
+ if (!netdev_vport->dev) {
+ err = -ENODEV;
+ goto error_free_vport;
+ }
+
+ if (netdev_vport->dev->flags & IFF_LOOPBACK ||
+ netdev_vport->dev->type != ARPHRD_ETHER ||
+ ovs_is_internal_dev(netdev_vport->dev)) {
+ err = -EINVAL;
+ goto error_put;
+ }
+
+ err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
+ vport);
+ if (err)
+ goto error_put;
+
+ dev_set_promiscuity(netdev_vport->dev, 1);
+ netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+
+ return vport;
+
+error_put:
+ dev_put(netdev_vport->dev);
+error_free_vport:
+ ovs_vport_free(vport);
+error:
+ return ERR_PTR(err);
+}
+
+static void netdev_destroy(struct vport *vport)
+{
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+ netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
+ netdev_rx_handler_unregister(netdev_vport->dev);
+ dev_set_promiscuity(netdev_vport->dev, -1);
+
+ synchronize_rcu();
+
+ dev_put(netdev_vport->dev);
+ ovs_vport_free(vport);
+}
+
+const char *ovs_netdev_get_name(const struct vport *vport)
+{
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->name;
+}
+
+int ovs_netdev_get_ifindex(const struct vport *vport)
+{
+ const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ return netdev_vport->dev->ifindex;
+}
+
+static unsigned packet_length(const struct sk_buff *skb)
+{
+ unsigned length = skb->len - ETH_HLEN;
+
+ if (skb->protocol == htons(ETH_P_8021Q))
+ length -= VLAN_HLEN;
+
+ return length;
+}
+
+static int netdev_send(struct vport *vport, struct sk_buff *skb)
+{
+ struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ int mtu = netdev_vport->dev->mtu;
+ int len;
+
+ if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+ if (net_ratelimit())
+ pr_warn("%s: dropped over-mtu packet: %d > %d\n",
+ ovs_dp_name(vport->dp), packet_length(skb), mtu);
+ goto error;
+ }
+
+ if (unlikely(skb_warn_if_lro(skb)))
+ goto error;
+
+ skb->dev = netdev_vport->dev;
+ len = skb->len;
+ dev_queue_xmit(skb);
+
+ return len;
+
+error:
+ kfree_skb(skb);
+ ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
+ return 0;
+}
+
+/* Returns null if this device is not attached to a datapath. */
+struct vport *ovs_netdev_get_vport(struct net_device *dev)
+{
+ if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
+ return (struct vport *)
+ rcu_dereference_rtnl(dev->rx_handler_data);
+ else
+ return NULL;
+}
+
+const struct vport_ops ovs_netdev_vport_ops = {
+ .type = OVS_VPORT_TYPE_NETDEV,
+ .create = netdev_create,
+ .destroy = netdev_destroy,
+ .get_name = ovs_netdev_get_name,
+ .get_ifindex = ovs_netdev_get_ifindex,
+ .send = netdev_send,
+};
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
new file mode 100644
index 000000000000..fd9b008a0e6e
--- /dev/null
+++ b/net/openvswitch/vport-netdev.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_NETDEV_H
+#define VPORT_NETDEV_H 1
+
+#include <linux/netdevice.h>
+
+#include "vport.h"
+
+struct vport *ovs_netdev_get_vport(struct net_device *dev);
+
+struct netdev_vport {
+ struct net_device *dev;
+};
+
+static inline struct netdev_vport *
+netdev_vport_priv(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
+
+const char *ovs_netdev_get_name(const struct vport *);
+const char *ovs_netdev_get_config(const struct vport *);
+int ovs_netdev_get_ifindex(const struct vport *);
+
+#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
new file mode 100644
index 000000000000..7f0ef3794c51
--- /dev/null
+++ b/net/openvswitch/vport.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/dcache.h>
+#include <linux/etherdevice.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include "vport.h"
+#include "vport-internal_dev.h"
+
+/* List of statically compiled vport implementations. Don't forget to also
+ * add yours to the list at the bottom of vport.h. */
+static const struct vport_ops *vport_ops_list[] = {
+ &ovs_netdev_vport_ops,
+ &ovs_internal_vport_ops,
+};
+
+/* Protected by RCU read lock for reading, RTNL lock for writing. */
+static struct hlist_head *dev_table;
+#define VPORT_HASH_BUCKETS 1024
+
+/**
+ * ovs_vport_init - initialize vport subsystem
+ *
+ * Called at module load time to initialize the vport subsystem.
+ */
+int ovs_vport_init(void)
+{
+ dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dev_table)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * ovs_vport_exit - shutdown vport subsystem
+ *
+ * Called at module exit time to shutdown the vport subsystem.
+ */
+void ovs_vport_exit(void)
+{
+ kfree(dev_table);
+}
+
+static struct hlist_head *hash_bucket(const char *name)
+{
+ unsigned int hash = full_name_hash(name, strlen(name));
+ return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
+}
+
+/**
+ * ovs_vport_locate - find a port that has already been created
+ *
+ * @name: name of port to find
+ *
+ * Must be called with RTNL or RCU read lock.
+ */
+struct vport *ovs_vport_locate(const char *name)
+{
+ struct hlist_head *bucket = hash_bucket(name);
+ struct vport *vport;
+ struct hlist_node *node;
+
+ hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
+ if (!strcmp(name, vport->ops->get_name(vport)))
+ return vport;
+
+ return NULL;
+}
+
+/**
+ * ovs_vport_alloc - allocate and initialize new vport
+ *
+ * @priv_size: Size of private data area to allocate.
+ * @ops: vport device ops
+ *
+ * Allocate and initialize a new vport defined by @ops. The vport will contain
+ * a private data area of size @priv_size that can be accessed using
+ * vport_priv(). vports that are no longer needed should be released with
+ * vport_free().
+ */
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
+ const struct vport_parms *parms)
+{
+ struct vport *vport;
+ size_t alloc_size;
+
+ alloc_size = sizeof(struct vport);
+ if (priv_size) {
+ alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
+ alloc_size += priv_size;
+ }
+
+ vport = kzalloc(alloc_size, GFP_KERNEL);
+ if (!vport)
+ return ERR_PTR(-ENOMEM);
+
+ vport->dp = parms->dp;
+ vport->port_no = parms->port_no;
+ vport->upcall_pid = parms->upcall_pid;
+ vport->ops = ops;
+
+ vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
+ if (!vport->percpu_stats) {
+ kfree(vport);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock_init(&vport->stats_lock);
+
+ return vport;
+}
+
+/**
+ * ovs_vport_free - uninitialize and free vport
+ *
+ * @vport: vport to free
+ *
+ * Frees a vport allocated with vport_alloc() when it is no longer needed.
+ *
+ * The caller must ensure that an RCU grace period has passed since the last
+ * time @vport was in a datapath.
+ */
+void ovs_vport_free(struct vport *vport)
+{
+ free_percpu(vport->percpu_stats);
+ kfree(vport);
+}
+
+/**
+ * ovs_vport_add - add vport device (for kernel callers)
+ *
+ * @parms: Information about new vport.
+ *
+ * Creates a new vport with the specified configuration (which is dependent on
+ * device type). RTNL lock must be held.
+ */
+struct vport *ovs_vport_add(const struct vport_parms *parms)
+{
+ struct vport *vport;
+ int err = 0;
+ int i;
+
+ ASSERT_RTNL();
+
+ for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
+ if (vport_ops_list[i]->type == parms->type) {
+ vport = vport_ops_list[i]->create(parms);
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ goto out;
+ }
+
+ hlist_add_head_rcu(&vport->hash_node,
+ hash_bucket(vport->ops->get_name(vport)));
+ return vport;
+ }
+ }
+
+ err = -EAFNOSUPPORT;
+
+out:
+ return ERR_PTR(err);
+}
+
+/**
+ * ovs_vport_set_options - modify existing vport device (for kernel callers)
+ *
+ * @vport: vport to modify.
+ * @port: New configuration.
+ *
+ * Modifies an existing device with the specified configuration (which is
+ * dependent on device type). RTNL lock must be held.
+ */
+int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
+{
+ ASSERT_RTNL();
+
+ if (!vport->ops->set_options)
+ return -EOPNOTSUPP;
+ return vport->ops->set_options(vport, options);
+}
+
+/**
+ * ovs_vport_del - delete existing vport device
+ *
+ * @vport: vport to delete.
+ *
+ * Detaches @vport from its datapath and destroys it. It is possible to fail
+ * for reasons such as lack of memory. RTNL lock must be held.
+ */
+void ovs_vport_del(struct vport *vport)
+{
+ ASSERT_RTNL();
+
+ hlist_del_rcu(&vport->hash_node);
+
+ vport->ops->destroy(vport);
+}
+
+/**
+ * ovs_vport_get_stats - retrieve device stats
+ *
+ * @vport: vport from which to retrieve the stats
+ * @stats: location to store stats
+ *
+ * Retrieves transmit, receive, and error stats for the given device.
+ *
+ * Must be called with RTNL lock or rcu_read_lock.
+ */
+void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+
+ /* We potentially have 2 sources of stats that need to be combined:
+ * those we have collected (split into err_stats and percpu_stats) from
+ * set_stats() and device error stats from netdev->get_stats() (for
+ * errors that happen downstream and therefore aren't reported through
+ * our vport_record_error() function).
+ * Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS).
+ * netdev-stats can be directly read over netlink-ioctl.
+ */
+
+ spin_lock_bh(&vport->stats_lock);
+
+ stats->rx_errors = vport->err_stats.rx_errors;
+ stats->tx_errors = vport->err_stats.tx_errors;
+ stats->tx_dropped = vport->err_stats.tx_dropped;
+ stats->rx_dropped = vport->err_stats.rx_dropped;
+
+ spin_unlock_bh(&vport->stats_lock);
+
+ for_each_possible_cpu(i) {
+ const struct vport_percpu_stats *percpu_stats;
+ struct vport_percpu_stats local_stats;
+ unsigned int start;
+
+ percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
+
+ do {
+ start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+ local_stats = *percpu_stats;
+ } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+
+ stats->rx_bytes += local_stats.rx_bytes;
+ stats->rx_packets += local_stats.rx_packets;
+ stats->tx_bytes += local_stats.tx_bytes;
+ stats->tx_packets += local_stats.tx_packets;
+ }
+}
+
+/**
+ * ovs_vport_get_options - retrieve device options
+ *
+ * @vport: vport from which to retrieve the options.
+ * @skb: sk_buff where options should be appended.
+ *
+ * Retrieves the configuration of the given device, appending an
+ * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
+ * vport-specific attributes to @skb.
+ *
+ * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
+ * negative error code if a real error occurred. If an error occurs, @skb is
+ * left unmodified.
+ *
+ * Must be called with RTNL lock or rcu_read_lock.
+ */
+int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
+{
+ struct nlattr *nla;
+
+ nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
+ if (!nla)
+ return -EMSGSIZE;
+
+ if (vport->ops->get_options) {
+ int err = vport->ops->get_options(vport, skb);
+ if (err) {
+ nla_nest_cancel(skb, nla);
+ return err;
+ }
+ }
+
+ nla_nest_end(skb, nla);
+ return 0;
+}
+
+/**
+ * ovs_vport_receive - pass up received packet to the datapath for processing
+ *
+ * @vport: vport that received the packet
+ * @skb: skb that was received
+ *
+ * Must be called with rcu_read_lock. The packet cannot be shared and
+ * skb->data should point to the Ethernet header. The caller must have already
+ * called compute_ip_summed() to initialize the checksumming fields.
+ */
+void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
+{
+ struct vport_percpu_stats *stats;
+
+ stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+ u64_stats_update_begin(&stats->sync);
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ u64_stats_update_end(&stats->sync);
+
+ ovs_dp_process_received_packet(vport, skb);
+}
+
+/**
+ * ovs_vport_send - send a packet on a device
+ *
+ * @vport: vport on which to send the packet
+ * @skb: skb to send
+ *
+ * Sends the given packet and returns the length of data sent. Either RTNL
+ * lock or rcu_read_lock must be held.
+ */
+int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+{
+ int sent = vport->ops->send(vport, skb);
+
+ if (likely(sent)) {
+ struct vport_percpu_stats *stats;
+
+ stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+ u64_stats_update_begin(&stats->sync);
+ stats->tx_packets++;
+ stats->tx_bytes += sent;
+ u64_stats_update_end(&stats->sync);
+ }
+ return sent;
+}
+
+/**
+ * ovs_vport_record_error - indicate device error to generic stats layer
+ *
+ * @vport: vport that encountered the error
+ * @err_type: one of enum vport_err_type types to indicate the error type
+ *
+ * If using the vport generic stats layer indicate that an error of the given
+ * type has occured.
+ */
+void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
+{
+ spin_lock(&vport->stats_lock);
+
+ switch (err_type) {
+ case VPORT_E_RX_DROPPED:
+ vport->err_stats.rx_dropped++;
+ break;
+
+ case VPORT_E_RX_ERROR:
+ vport->err_stats.rx_errors++;
+ break;
+
+ case VPORT_E_TX_DROPPED:
+ vport->err_stats.tx_dropped++;
+ break;
+
+ case VPORT_E_TX_ERROR:
+ vport->err_stats.tx_errors++;
+ break;
+ };
+
+ spin_unlock(&vport->stats_lock);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
new file mode 100644
index 000000000000..19609629dabd
--- /dev/null
+++ b/net/openvswitch/vport.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_H
+#define VPORT_H 1
+
+#include <linux/list.h>
+#include <linux/openvswitch.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/u64_stats_sync.h>
+
+#include "datapath.h"
+
+struct vport;
+struct vport_parms;
+
+/* The following definitions are for users of the vport subsytem: */
+
+int ovs_vport_init(void);
+void ovs_vport_exit(void);
+
+struct vport *ovs_vport_add(const struct vport_parms *);
+void ovs_vport_del(struct vport *);
+
+struct vport *ovs_vport_locate(const char *name);
+
+void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
+
+int ovs_vport_set_options(struct vport *, struct nlattr *options);
+int ovs_vport_get_options(const struct vport *, struct sk_buff *);
+
+int ovs_vport_send(struct vport *, struct sk_buff *);
+
+/* The following definitions are for implementers of vport devices: */
+
+struct vport_percpu_stats {
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 tx_bytes;
+ u64 tx_packets;
+ struct u64_stats_sync sync;
+};
+
+struct vport_err_stats {
+ u64 rx_dropped;
+ u64 rx_errors;
+ u64 tx_dropped;
+ u64 tx_errors;
+};
+
+/**
+ * struct vport - one port within a datapath
+ * @rcu: RCU callback head for deferred destruction.
+ * @port_no: Index into @dp's @ports array.
+ * @dp: Datapath to which this port belongs.
+ * @node: Element in @dp's @port_list.
+ * @upcall_pid: The Netlink port to use for packets received on this port that
+ * miss the flow table.
+ * @hash_node: Element in @dev_table hash table in vport.c.
+ * @ops: Class structure.
+ * @percpu_stats: Points to per-CPU statistics used and maintained by vport
+ * @stats_lock: Protects @err_stats;
+ * @err_stats: Points to error statistics used and maintained by vport
+ */
+struct vport {
+ struct rcu_head rcu;
+ u16 port_no;
+ struct datapath *dp;
+ struct list_head node;
+ u32 upcall_pid;
+
+ struct hlist_node hash_node;
+ const struct vport_ops *ops;
+
+ struct vport_percpu_stats __percpu *percpu_stats;
+
+ spinlock_t stats_lock;
+ struct vport_err_stats err_stats;
+};
+
+/**
+ * struct vport_parms - parameters for creating a new vport
+ *
+ * @name: New vport's name.
+ * @type: New vport's type.
+ * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
+ * none was supplied.
+ * @dp: New vport's datapath.
+ * @port_no: New vport's port number.
+ */
+struct vport_parms {
+ const char *name;
+ enum ovs_vport_type type;
+ struct nlattr *options;
+
+ /* For ovs_vport_alloc(). */
+ struct datapath *dp;
+ u16 port_no;
+ u32 upcall_pid;
+};
+
+/**
+ * struct vport_ops - definition of a type of virtual port
+ *
+ * @type: %OVS_VPORT_TYPE_* value for this type of virtual port.
+ * @create: Create a new vport configured as specified. On success returns
+ * a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value.
+ * @destroy: Destroys a vport. Must call vport_free() on the vport but not
+ * before an RCU grace period has elapsed.
+ * @set_options: Modify the configuration of an existing vport. May be %NULL
+ * if modification is not supported.
+ * @get_options: Appends vport-specific attributes for the configuration of an
+ * existing vport to a &struct sk_buff. May be %NULL for a vport that does not
+ * have any configuration.
+ * @get_name: Get the device's name.
+ * @get_config: Get the device's configuration.
+ * @get_ifindex: Get the system interface index associated with the device.
+ * May be null if the device does not have an ifindex.
+ * @send: Send a packet on the device. Returns the length of the packet sent.
+ */
+struct vport_ops {
+ enum ovs_vport_type type;
+
+ /* Called with RTNL lock. */
+ struct vport *(*create)(const struct vport_parms *);
+ void (*destroy)(struct vport *);
+
+ int (*set_options)(struct vport *, struct nlattr *);
+ int (*get_options)(const struct vport *, struct sk_buff *);
+
+ /* Called with rcu_read_lock or RTNL lock. */
+ const char *(*get_name)(const struct vport *);
+ void (*get_config)(const struct vport *, void *);
+ int (*get_ifindex)(const struct vport *);
+
+ int (*send)(struct vport *, struct sk_buff *);
+};
+
+enum vport_err_type {
+ VPORT_E_RX_DROPPED,
+ VPORT_E_RX_ERROR,
+ VPORT_E_TX_DROPPED,
+ VPORT_E_TX_ERROR,
+};
+
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
+ const struct vport_parms *);
+void ovs_vport_free(struct vport *);
+
+#define VPORT_ALIGN 8
+
+/**
+ * vport_priv - access private data area of vport
+ *
+ * @vport: vport to access
+ *
+ * If a nonzero size was passed in priv_size of vport_alloc() a private data
+ * area was allocated on creation. This allows that area to be accessed and
+ * used for any purpose needed by the vport implementer.
+ */
+static inline void *vport_priv(const struct vport *vport)
+{
+ return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+}
+
+/**
+ * vport_from_priv - lookup vport from private data pointer
+ *
+ * @priv: Start of private data area.
+ *
+ * It is sometimes useful to translate from a pointer to the private data
+ * area to the vport, such as in the case where the private data pointer is
+ * the result of a hash table lookup. @priv must point to the start of the
+ * private data area.
+ */
+static inline struct vport *vport_from_priv(const void *priv)
+{
+ return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+}
+
+void ovs_vport_receive(struct vport *, struct sk_buff *);
+void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
+
+/* List of statically compiled vport implementations. Don't forget to also
+ * add yours to the list at the top of vport.c. */
+extern const struct vport_ops ovs_netdev_vport_ops;
+extern const struct vport_ops ovs_internal_vport_ops;
+
+#endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 82a6f34d39d0..2dbb32b988c4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1499,10 +1499,11 @@ retry:
if (!skb) {
size_t reserved = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
rcu_read_unlock();
- skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
+ skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
if (skb == NULL)
return -ENOBUFS;
/* FIXME: Save some space for broken drivers that write a hard
@@ -1630,8 +1631,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
if (snaplen > res)
snaplen = res;
- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned)sk->sk_rcvbuf)
+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
goto drop_n_acct;
if (skb_shared(skb)) {
@@ -1762,8 +1762,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (po->tp_version <= TPACKET_V2) {
if (macoff + snaplen > po->rx_ring.frame_size) {
if (po->copy_thresh &&
- atomic_read(&sk->sk_rmem_alloc) + skb->truesize
- < (unsigned)sk->sk_rcvbuf) {
+ atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
if (skb_shared(skb)) {
copy_skb = skb_clone(skb, GFP_ATOMIC);
} else {
@@ -1944,7 +1943,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, int size_max,
- __be16 proto, unsigned char *addr)
+ __be16 proto, unsigned char *addr, int hlen)
{
union {
struct tpacket_hdr *h1;
@@ -1978,7 +1977,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
return -EMSGSIZE;
}
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2053,6 +2052,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
unsigned char *addr;
int len_sum = 0;
int status = 0;
+ int hlen, tlen;
mutex_lock(&po->pg_vec_lock);
@@ -2101,16 +2101,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
status = TP_STATUS_SEND_REQUEST;
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
skb = sock_alloc_send_skb(&po->sk,
- LL_ALLOCATED_SPACE(dev)
- + sizeof(struct sockaddr_ll),
+ hlen + tlen + sizeof(struct sockaddr_ll),
0, &err);
if (unlikely(skb == NULL))
goto out_status;
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
- addr);
+ addr, hlen);
if (unlikely(tp_len < 0)) {
if (po->tp_loss) {
@@ -2207,6 +2208,7 @@ static int packet_snd(struct socket *sock,
int vnet_hdr_len;
struct packet_sock *po = pkt_sk(sk);
unsigned short gso_type = 0;
+ int hlen, tlen;
/*
* Get and verify the address.
@@ -2291,8 +2293,9 @@ static int packet_snd(struct socket *sock,
goto out_unlock;
err = -ENOBUFS;
- skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
- LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len,
msg->msg_flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto out_unlock;
@@ -2450,8 +2453,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc
{
struct packet_sock *po = pkt_sk(sk);
- if (po->fanout)
+ if (po->fanout) {
+ if (dev)
+ dev_put(dev);
+
return -EINVAL;
+ }
lock_sock(sk);
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 2ba6e9fb4cbc..9f60008740e3 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -534,6 +534,29 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
return pipe_handler_send_created_ind(sk);
}
+static int pep_enableresp_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct pnpipehdr *hdr = pnp_hdr(skb);
+
+ if (hdr->error_code != PN_PIPE_NO_ERROR)
+ return -ECONNREFUSED;
+
+ return pep_indicate(sk, PNS_PIPE_ENABLED_IND, 0 /* sub-blocks */,
+ NULL, 0, GFP_ATOMIC);
+
+}
+
+static void pipe_start_flow_control(struct sock *sk)
+{
+ struct pep_sock *pn = pep_sk(sk);
+
+ if (!pn_flow_safe(pn->tx_fc)) {
+ atomic_set(&pn->tx_credits, 1);
+ sk->sk_write_space(sk);
+ }
+ pipe_grant_credits(sk, GFP_ATOMIC);
+}
+
/* Queue an skb to an actively connected sock.
* Socket lock must be held. */
static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
@@ -579,13 +602,25 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_state = TCP_CLOSE_WAIT;
break;
}
+ if (pn->init_enable == PN_PIPE_DISABLE)
+ sk->sk_state = TCP_SYN_RECV;
+ else {
+ sk->sk_state = TCP_ESTABLISHED;
+ pipe_start_flow_control(sk);
+ }
+ break;
- sk->sk_state = TCP_ESTABLISHED;
- if (!pn_flow_safe(pn->tx_fc)) {
- atomic_set(&pn->tx_credits, 1);
- sk->sk_write_space(sk);
+ case PNS_PEP_ENABLE_RESP:
+ if (sk->sk_state != TCP_SYN_SENT)
+ break;
+
+ if (pep_enableresp_rcv(sk, skb)) {
+ sk->sk_state = TCP_CLOSE_WAIT;
+ break;
}
- pipe_grant_credits(sk, GFP_ATOMIC);
+
+ sk->sk_state = TCP_ESTABLISHED;
+ pipe_start_flow_control(sk);
break;
case PNS_PEP_DISCONNECT_RESP:
@@ -864,14 +899,32 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
int err;
u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
- pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+ if (pn->pipe_handle == PN_PIPE_INVALID_HANDLE)
+ pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+
err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
- PN_PIPE_ENABLE, data, 4);
+ pn->init_enable, data, 4);
if (err) {
pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
return err;
}
+
sk->sk_state = TCP_SYN_SENT;
+
+ return 0;
+}
+
+static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
+{
+ int err;
+
+ err = pipe_handler_request(sk, PNS_PEP_ENABLE_REQ, PAD,
+ NULL, 0);
+ if (err)
+ return err;
+
+ sk->sk_state = TCP_SYN_SENT;
+
return 0;
}
@@ -879,11 +932,14 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
struct pep_sock *pn = pep_sk(sk);
int answ;
+ int ret = -ENOIOCTLCMD;
switch (cmd) {
case SIOCINQ:
- if (sk->sk_state == TCP_LISTEN)
- return -EINVAL;
+ if (sk->sk_state == TCP_LISTEN) {
+ ret = -EINVAL;
+ break;
+ }
lock_sock(sk);
if (sock_flag(sk, SOCK_URGINLINE) &&
@@ -894,10 +950,22 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
else
answ = 0;
release_sock(sk);
- return put_user(answ, (int __user *)arg);
+ ret = put_user(answ, (int __user *)arg);
+ break;
+
+ case SIOCPNENABLEPIPE:
+ lock_sock(sk);
+ if (sk->sk_state == TCP_SYN_SENT)
+ ret = -EBUSY;
+ else if (sk->sk_state == TCP_ESTABLISHED)
+ ret = -EISCONN;
+ else
+ ret = pep_sock_enable(sk, NULL, 0);
+ release_sock(sk);
+ break;
}
- return -ENOIOCTLCMD;
+ return ret;
}
static int pep_init(struct sock *sk)
@@ -960,6 +1028,18 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
}
goto out_norel;
+ case PNPIPE_HANDLE:
+ if ((sk->sk_state == TCP_CLOSE) &&
+ (val >= 0) && (val < PN_PIPE_INVALID_HANDLE))
+ pn->pipe_handle = val;
+ else
+ err = -EINVAL;
+ break;
+
+ case PNPIPE_INITSTATE:
+ pn->init_enable = !!val;
+ break;
+
default:
err = -ENOPROTOOPT;
}
@@ -995,6 +1075,10 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
return -EINVAL;
break;
+ case PNPIPE_INITSTATE:
+ val = pn->init_enable;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 4cf6dc7910e4..ec753b3ae72a 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -9,7 +9,6 @@ config RDS
config RDS_RDMA
tristate "RDS over Infiniband and iWARP"
- select LLIST
depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
---help---
Allow RDS to use Infiniband and iWARP as a transport.
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index 2ebfe8d0e873..11da3018a853 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -36,12 +36,12 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
if (blocked) {
if (rfkill_data->reg_enabled) {
regulator_disable(rfkill_data->vcc);
- rfkill_data->reg_enabled = 0;
+ rfkill_data->reg_enabled = false;
}
} else {
if (!rfkill_data->reg_enabled) {
regulator_enable(rfkill_data->vcc);
- rfkill_data->reg_enabled = 1;
+ rfkill_data->reg_enabled = true;
}
}
@@ -96,7 +96,7 @@ static int __devinit rfkill_regulator_probe(struct platform_device *pdev)
if (regulator_is_enabled(vcc)) {
dev_dbg(&pdev->dev, "Regulator already enabled\n");
- rfkill_data->reg_enabled = 1;
+ rfkill_data->reg_enabled = true;
}
rfkill_data->vcc = vcc;
rfkill_data->rf_kill = rf_kill;
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index f99cfce7ca97..c3126e864f3c 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -195,7 +195,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
sp = rxrpc_skb(txb);
if (sp->need_resend) {
- sp->need_resend = 0;
+ sp->need_resend = false;
/* each Tx packet has a new serial number */
sp->hdr.serial =
@@ -216,7 +216,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
}
if (time_after_eq(jiffies + 1, sp->resend_at)) {
- sp->need_resend = 1;
+ sp->need_resend = true;
resend |= 1;
} else if (resend & 2) {
if (time_before(sp->resend_at, resend_at))
@@ -265,7 +265,7 @@ static void rxrpc_resend_timer(struct rxrpc_call *call)
if (sp->need_resend) {
;
} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
- sp->need_resend = 1;
+ sp->need_resend = true;
resend |= 1;
} else if (resend & 2) {
if (time_before(sp->resend_at, resend_at))
@@ -314,11 +314,11 @@ static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
switch (sacks[loop]) {
case RXRPC_ACK_TYPE_ACK:
- sp->need_resend = 0;
+ sp->need_resend = false;
*p_txb |= 1;
break;
case RXRPC_ACK_TYPE_NACK:
- sp->need_resend = 1;
+ sp->need_resend = true;
*p_txb &= ~1;
resend = 1;
break;
@@ -344,13 +344,13 @@ static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
if (*p_txb & 1) {
/* packet must have been discarded */
- sp->need_resend = 1;
+ sp->need_resend = true;
*p_txb &= ~1;
resend |= 1;
} else if (sp->need_resend) {
;
} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
- sp->need_resend = 1;
+ sp->need_resend = true;
resend |= 1;
} else if (resend & 2) {
if (time_before(sp->resend_at, resend_at))
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 43ea7de2fc8e..4cba13e46ffd 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -306,10 +306,9 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
td->data_len = len;
if (len > 0) {
- td->data = kmalloc(len, GFP_KERNEL);
+ td->data = kmemdup(xdr, len, GFP_KERNEL);
if (!td->data)
return -ENOMEM;
- memcpy(td->data, xdr, len);
len = (len + 3) & ~3;
toklen -= len;
xdr += len >> 2;
@@ -401,10 +400,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
_debug("ticket len %u", len);
if (len > 0) {
- *_ticket = kmalloc(len, GFP_KERNEL);
+ *_ticket = kmemdup(xdr, len, GFP_KERNEL);
if (!*_ticket)
return -ENOMEM;
- memcpy(*_ticket, xdr, len);
len = (len + 3) & ~3;
toklen -= len;
xdr += len >> 2;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 338d793c7113..16ae88762d00 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -486,7 +486,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
_proto("Tx DATA %%%u { #%u }",
ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
- sp->need_resend = 0;
+ sp->need_resend = false;
sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
_debug("run timer");
@@ -508,7 +508,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
if (ret < 0) {
_debug("need instant resend %d", ret);
- sp->need_resend = 1;
+ sp->need_resend = true;
rxrpc_instant_resend(call);
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 7b582300d051..1d8bd0dbcd1f 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,6 +26,8 @@
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/flow_keys.h>
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
#endif
@@ -66,134 +68,37 @@ static inline u32 addr_fold(void *addr)
return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
}
-static u32 flow_get_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be32 *data = NULL, hdata;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct iphdr,
- saddr),
- 4, &hdata);
- break;
- case htons(ETH_P_IPV6):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct ipv6hdr,
- saddr.s6_addr32[3]),
- 4, &hdata);
- break;
- }
-
- if (data)
- return ntohl(*data);
+ if (flow->src)
+ return ntohl(flow->src);
return addr_fold(skb->sk);
}
-static u32 flow_get_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be32 *data = NULL, hdata;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct iphdr,
- daddr),
- 4, &hdata);
- break;
- case htons(ETH_P_IPV6):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct ipv6hdr,
- daddr.s6_addr32[3]),
- 4, &hdata);
- break;
- }
-
- if (data)
- return ntohl(*data);
+ if (flow->dst)
+ return ntohl(flow->dst);
return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
}
-static u32 flow_get_proto(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __u8 *data = NULL, hdata;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct iphdr,
- protocol),
- 1, &hdata);
- break;
- case htons(ETH_P_IPV6):
- data = skb_header_pointer(skb,
- nhoff + offsetof(struct ipv6hdr,
- nexthdr),
- 1, &hdata);
- break;
- }
- if (data)
- return *data;
- return 0;
+ return flow->ip_proto;
}
-/* helper function to get either src or dst port */
-static __be16 *flow_get_proto_common(const struct sk_buff *skb, int nhoff,
- __be16 *_port, int dst)
+static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be16 *port = NULL;
- int poff;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP): {
- struct iphdr *iph, _iph;
-
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!iph)
- break;
- if (ip_is_fragment(iph))
- break;
- poff = proto_ports_offset(iph->protocol);
- if (poff >= 0)
- port = skb_header_pointer(skb,
- nhoff + iph->ihl * 4 + poff + dst,
- sizeof(*_port), _port);
- break;
- }
- case htons(ETH_P_IPV6): {
- struct ipv6hdr *iph, _iph;
-
- iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!iph)
- break;
- poff = proto_ports_offset(iph->nexthdr);
- if (poff >= 0)
- port = skb_header_pointer(skb,
- nhoff + sizeof(*iph) + poff + dst,
- sizeof(*_port), _port);
- break;
- }
- }
-
- return port;
-}
-
-static u32 flow_get_proto_src(const struct sk_buff *skb, int nhoff)
-{
- __be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 0);
-
- if (port)
- return ntohs(*port);
+ if (flow->ports)
+ return ntohs(flow->port16[0]);
return addr_fold(skb->sk);
}
-static u32 flow_get_proto_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- __be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 2);
-
- if (port)
- return ntohs(*port);
+ if (flow->ports)
+ return ntohs(flow->port16[1]);
return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
}
@@ -239,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
})
#endif
-static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
@@ -248,10 +153,10 @@ static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
}
fallback:
- return flow_get_src(skb, nhoff);
+ return flow_get_src(skb, flow);
}
-static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
@@ -260,21 +165,21 @@ static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff)
return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
}
fallback:
- return flow_get_dst(skb, nhoff);
+ return flow_get_dst(skb, flow);
}
-static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
return ntohs(CTTUPLE(skb, src.u.all));
fallback:
- return flow_get_proto_src(skb, nhoff);
+ return flow_get_proto_src(skb, flow);
}
-static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
return ntohs(CTTUPLE(skb, dst.u.all));
fallback:
- return flow_get_proto_dst(skb, nhoff);
+ return flow_get_proto_dst(skb, flow);
}
static u32 flow_get_rtclassid(const struct sk_buff *skb)
@@ -314,21 +219,19 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
return skb_get_rxhash(skb);
}
-static u32 flow_key_get(struct sk_buff *skb, int key)
+static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
{
- int nhoff = skb_network_offset(skb);
-
switch (key) {
case FLOW_KEY_SRC:
- return flow_get_src(skb, nhoff);
+ return flow_get_src(skb, flow);
case FLOW_KEY_DST:
- return flow_get_dst(skb, nhoff);
+ return flow_get_dst(skb, flow);
case FLOW_KEY_PROTO:
- return flow_get_proto(skb, nhoff);
+ return flow_get_proto(skb, flow);
case FLOW_KEY_PROTO_SRC:
- return flow_get_proto_src(skb, nhoff);
+ return flow_get_proto_src(skb, flow);
case FLOW_KEY_PROTO_DST:
- return flow_get_proto_dst(skb, nhoff);
+ return flow_get_proto_dst(skb, flow);
case FLOW_KEY_IIF:
return flow_get_iif(skb);
case FLOW_KEY_PRIORITY:
@@ -338,13 +241,13 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
case FLOW_KEY_NFCT:
return flow_get_nfct(skb);
case FLOW_KEY_NFCT_SRC:
- return flow_get_nfct_src(skb, nhoff);
+ return flow_get_nfct_src(skb, flow);
case FLOW_KEY_NFCT_DST:
- return flow_get_nfct_dst(skb, nhoff);
+ return flow_get_nfct_dst(skb, flow);
case FLOW_KEY_NFCT_PROTO_SRC:
- return flow_get_nfct_proto_src(skb, nhoff);
+ return flow_get_nfct_proto_src(skb, flow);
case FLOW_KEY_NFCT_PROTO_DST:
- return flow_get_nfct_proto_dst(skb, nhoff);
+ return flow_get_nfct_proto_dst(skb, flow);
case FLOW_KEY_RTCLASSID:
return flow_get_rtclassid(skb);
case FLOW_KEY_SKUID:
@@ -361,6 +264,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
}
}
+#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \
+ (1 << FLOW_KEY_DST) | \
+ (1 << FLOW_KEY_PROTO) | \
+ (1 << FLOW_KEY_PROTO_SRC) | \
+ (1 << FLOW_KEY_PROTO_DST) | \
+ (1 << FLOW_KEY_NFCT_SRC) | \
+ (1 << FLOW_KEY_NFCT_DST) | \
+ (1 << FLOW_KEY_NFCT_PROTO_SRC) | \
+ (1 << FLOW_KEY_NFCT_PROTO_DST))
+
static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -372,17 +285,20 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
int r;
list_for_each_entry(f, &head->filters, list) {
- u32 keys[f->nkeys];
+ u32 keys[FLOW_KEY_MAX + 1];
+ struct flow_keys flow_keys;
if (!tcf_em_tree_match(skb, &f->ematches, NULL))
continue;
keymask = f->keymask;
+ if (keymask & FLOW_KEYS_NEEDED)
+ skb_flow_dissect(skb, &flow_keys);
for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1;
keymask &= ~(1 << key);
- keys[n] = flow_key_get(skb, key);
+ keys[n] = flow_key_get(skb, key, &flow_keys);
}
if (f->mode == FLOW_MODE_HASH)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index dca6c1a576f7..3d8981fde301 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -618,20 +618,24 @@ void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
}
EXPORT_SYMBOL(qdisc_class_hash_remove);
-/* Allocate an unique handle from space managed by kernel */
-
+/* Allocate an unique handle from space managed by kernel
+ * Possible range is [8000-FFFF]:0000 (0x8000 values)
+ */
static u32 qdisc_alloc_handle(struct net_device *dev)
{
- int i = 0x10000;
+ int i = 0x8000;
static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
do {
autohandle += TC_H_MAKE(0x10000U, 0);
if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
autohandle = TC_H_MAKE(0x80000000U, 0);
- } while (qdisc_lookup(dev, autohandle) && --i > 0);
+ if (!qdisc_lookup(dev, autohandle))
+ return autohandle;
+ cond_resched();
+ } while (--i > 0);
- return i > 0 ? autohandle : 0;
+ return 0;
}
void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3422b25df9e4..bef00acb8bd2 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -19,10 +19,7 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/red.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/ipv6.h>
-#include <net/ipv6.h>
+#include <net/flow_keys.h>
/*
CHOKe stateless AQM for fair bandwidth allocation
@@ -142,85 +139,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
--sch->q.qlen;
}
-/*
- * Compare flow of two packets
- * Returns true only if source and destination address and port match.
- * false for special cases
- */
-static bool choke_match_flow(struct sk_buff *skb1,
- struct sk_buff *skb2)
-{
- int off1, off2, poff;
- const u32 *ports1, *ports2;
- u8 ip_proto;
- __u32 hash1;
-
- if (skb1->protocol != skb2->protocol)
- return false;
-
- /* Use hash value as quick check
- * Assumes that __skb_get_rxhash makes IP header and ports linear
- */
- hash1 = skb_get_rxhash(skb1);
- if (!hash1 || hash1 != skb_get_rxhash(skb2))
- return false;
-
- /* Probably match, but be sure to avoid hash collisions */
- off1 = skb_network_offset(skb1);
- off2 = skb_network_offset(skb2);
-
- switch (skb1->protocol) {
- case __constant_htons(ETH_P_IP): {
- const struct iphdr *ip1, *ip2;
-
- ip1 = (const struct iphdr *) (skb1->data + off1);
- ip2 = (const struct iphdr *) (skb2->data + off2);
-
- ip_proto = ip1->protocol;
- if (ip_proto != ip2->protocol ||
- ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
- return false;
-
- if (ip_is_fragment(ip1) | ip_is_fragment(ip2))
- ip_proto = 0;
- off1 += ip1->ihl * 4;
- off2 += ip2->ihl * 4;
- break;
- }
-
- case __constant_htons(ETH_P_IPV6): {
- const struct ipv6hdr *ip1, *ip2;
-
- ip1 = (const struct ipv6hdr *) (skb1->data + off1);
- ip2 = (const struct ipv6hdr *) (skb2->data + off2);
-
- ip_proto = ip1->nexthdr;
- if (ip_proto != ip2->nexthdr ||
- ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
- ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
- return false;
- off1 += 40;
- off2 += 40;
- }
-
- default: /* Maybe compare MAC header here? */
- return false;
- }
-
- poff = proto_ports_offset(ip_proto);
- if (poff < 0)
- return true;
-
- off1 += poff;
- off2 += poff;
-
- ports1 = (__force u32 *)(skb1->data + off1);
- ports2 = (__force u32 *)(skb2->data + off2);
- return *ports1 == *ports2;
-}
-
struct choke_skb_cb {
- u16 classid;
+ u16 classid;
+ u8 keys_valid;
+ struct flow_keys keys;
};
static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -241,6 +163,32 @@ static u16 choke_get_classid(const struct sk_buff *skb)
}
/*
+ * Compare flow of two packets
+ * Returns true only if source and destination address and port match.
+ * false for special cases
+ */
+static bool choke_match_flow(struct sk_buff *skb1,
+ struct sk_buff *skb2)
+{
+ if (skb1->protocol != skb2->protocol)
+ return false;
+
+ if (!choke_skb_cb(skb1)->keys_valid) {
+ choke_skb_cb(skb1)->keys_valid = 1;
+ skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
+ }
+
+ if (!choke_skb_cb(skb2)->keys_valid) {
+ choke_skb_cb(skb2)->keys_valid = 1;
+ skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
+ }
+
+ return !memcmp(&choke_skb_cb(skb1)->keys,
+ &choke_skb_cb(skb2)->keys,
+ sizeof(struct flow_keys));
+}
+
+/*
* Classify flow using either:
* 1. pre-existing classification result in skb
* 2. fast internal classification
@@ -326,6 +274,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
goto other_drop; /* Packet was eaten by filter */
}
+ choke_skb_cb(skb)->keys_valid = 0;
/* Compute average queue usage (see RED) */
p->qavg = red_calc_qavg(p, sch->q.qlen);
if (red_is_idling(p))
@@ -445,6 +394,7 @@ static void choke_reset(struct Qdisc *sch)
static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
[TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
+ [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
};
@@ -466,6 +416,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
int err;
struct sk_buff **old = NULL;
unsigned int mask;
+ u32 max_P;
if (opt == NULL)
return -EINVAL;
@@ -478,6 +429,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
tb[TCA_CHOKE_STAB] == NULL)
return -EINVAL;
+ max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
+
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -527,7 +480,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_CHOKE_STAB]));
+ nla_data(tb[TCA_CHOKE_STAB]),
+ max_P);
if (q->head == q->tail)
red_end_of_idle_period(&q->parms);
@@ -561,6 +515,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+ NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P);
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca2798804..67fc573e013a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
/* check the reason of requeuing without tx lock first */
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
- if (!netif_tx_queue_frozen_or_stopped(txq)) {
+ if (!netif_xmit_frozen_or_stopped(txq)) {
q->gso_skb = NULL;
q->q.qlen--;
} else
@@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
spin_unlock(root_lock);
HARD_TX_LOCK(dev, txq, smp_processor_id());
- if (!netif_tx_queue_frozen_or_stopped(txq))
+ if (!netif_xmit_frozen_or_stopped(txq))
ret = dev_hard_start_xmit(skb, dev, txq);
HARD_TX_UNLOCK(dev, txq);
@@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
ret = dev_requeue_skb(skb, q);
}
- if (ret && netif_tx_queue_frozen_or_stopped(txq))
+ if (ret && netif_xmit_frozen_or_stopped(txq))
ret = 0;
return ret;
@@ -242,10 +242,11 @@ static void dev_watchdog(unsigned long arg)
* old device drivers set dev->trans_start
*/
trans_start = txq->trans_start ? : dev->trans_start;
- if (netif_tx_queue_stopped(txq) &&
+ if (netif_xmit_stopped(txq) &&
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
some_queue_timedout = 1;
+ txq->trans_timeout++;
break;
}
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index b9493a09a870..53204de71c39 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -34,7 +34,7 @@ struct gred_sched;
struct gred_sched_data {
u32 limit; /* HARD maximal queue length */
- u32 DP; /* the drop pramaters */
+ u32 DP; /* the drop parameters */
u32 bytesin; /* bytes seen on virtualQ so far*/
u32 packetsin; /* packets seen on virtualQ so far*/
u32 backlog; /* bytes on the virtualQ */
@@ -379,18 +379,20 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
}
static inline int gred_change_vq(struct Qdisc *sch, int dp,
- struct tc_gred_qopt *ctl, int prio, u8 *stab)
+ struct tc_gred_qopt *ctl, int prio,
+ u8 *stab, u32 max_P,
+ struct gred_sched_data **prealloc)
{
struct gred_sched *table = qdisc_priv(sch);
- struct gred_sched_data *q;
+ struct gred_sched_data *q = table->tab[dp];
- if (table->tab[dp] == NULL) {
- table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL);
- if (table->tab[dp] == NULL)
+ if (!q) {
+ table->tab[dp] = q = *prealloc;
+ *prealloc = NULL;
+ if (!q)
return -ENOMEM;
}
- q = table->tab[dp];
q->DP = dp;
q->prio = prio;
q->limit = ctl->limit;
@@ -400,7 +402,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
red_set_parms(&q->parms,
ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
- ctl->Scell_log, stab);
+ ctl->Scell_log, stab, max_P);
return 0;
}
@@ -409,6 +411,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
[TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) },
[TCA_GRED_STAB] = { .len = 256 },
[TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
+ [TCA_GRED_MAX_P] = { .type = NLA_U32 },
};
static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -418,6 +421,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
struct nlattr *tb[TCA_GRED_MAX + 1];
int err, prio = GRED_DEF_PRIO;
u8 *stab;
+ u32 max_P;
+ struct gred_sched_data *prealloc;
if (opt == NULL)
return -EINVAL;
@@ -433,6 +438,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
tb[TCA_GRED_STAB] == NULL)
return -EINVAL;
+ max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
+
err = -EINVAL;
ctl = nla_data(tb[TCA_GRED_PARMS]);
stab = nla_data(tb[TCA_GRED_STAB]);
@@ -455,9 +462,10 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
prio = ctl->prio;
}
+ prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
sch_tree_lock(sch);
- err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
+ err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc);
if (err < 0)
goto errout_locked;
@@ -471,6 +479,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
errout_locked:
sch_tree_unlock(sch);
+ kfree(prealloc);
errout:
return err;
}
@@ -498,6 +507,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
struct gred_sched *table = qdisc_priv(sch);
struct nlattr *parms, *opts = NULL;
int i;
+ u32 max_p[MAX_DPs];
struct tc_gred_sopt sopt = {
.DPs = table->DPs,
.def_DP = table->def,
@@ -509,6 +519,14 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+
+ for (i = 0; i < MAX_DPs; i++) {
+ struct gred_sched_data *q = table->tab[i];
+
+ max_p[i] = q ? q->parms.max_P : 0;
+ }
+ NLA_PUT(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p);
+
parms = nla_nest_start(skb, TCA_GRED_PARMS);
if (parms == NULL)
goto nla_put_failure;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6488e6425652..9bdca2e011e9 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1368,6 +1368,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct tc_hfsc_stats xstats;
cl->qstats.qlen = cl->qdisc->q.qlen;
+ cl->qstats.backlog = cl->qdisc->qstats.backlog;
xstats.level = cl->level;
xstats.period = cl->cl_vtperiod;
xstats.work = cl->cl_total;
@@ -1561,6 +1562,15 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
struct hfsc_sched *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
struct tc_hfsc_qopt qopt;
+ struct hfsc_class *cl;
+ struct hlist_node *n;
+ unsigned int i;
+
+ sch->qstats.backlog = 0;
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+ sch->qstats.backlog += cl->qdisc->qstats.backlog;
+ }
qopt.defcls = q->defcls;
NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index f88256cbacbf..28de43092330 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -107,7 +107,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (!netif_is_multiqueue(dev))
return -EOPNOTSUPP;
- if (nla_len(opt) < sizeof(*qopt))
+ if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
qopt = nla_data(opt);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index edc1950e0e77..49131d7a7446 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
/* Check that target subqueue is available before
* pulling an skb to avoid head-of-line blocking.
*/
- if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+ if (!netif_xmit_stopped(
+ netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
qdisc = q->queues[q->curband];
skb = qdisc->dequeue(qdisc);
if (skb) {
@@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
/* Check that target subqueue is available before
* pulling an skb to avoid head-of-line blocking.
*/
- if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+ if (!netif_xmit_stopped(
+ netdev_get_tx_queue(qdisc_dev(sch), curband))) {
qdisc = q->queues[curband];
skb = qdisc->ops->peek(qdisc);
if (skb)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index eb3b9a86c6ed..06a5cebad342 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
+#include <linux/reciprocal_div.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
@@ -66,7 +67,11 @@
*/
struct netem_sched_data {
+ /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
+
+ /* optional qdisc for classful handling (NULL at netem init) */
struct Qdisc *qdisc;
+
struct qdisc_watchdog watchdog;
psched_tdiff_t latency;
@@ -79,6 +84,11 @@ struct netem_sched_data {
u32 duplicate;
u32 reorder;
u32 corrupt;
+ u32 rate;
+ s32 packet_overhead;
+ u32 cell_size;
+ u32 cell_size_reciprocal;
+ s32 cell_overhead;
struct crndstate {
u32 last;
@@ -111,7 +121,9 @@ struct netem_sched_data {
};
-/* Time stamp put into socket buffer control block */
+/* Time stamp put into socket buffer control block
+ * Only valid when skbs are in our internal t(ime)fifo queue.
+ */
struct netem_skb_cb {
psched_time_t time_to_send;
};
@@ -298,6 +310,51 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
+static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
+{
+ u64 ticks;
+
+ len += q->packet_overhead;
+
+ if (q->cell_size) {
+ u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
+
+ if (len > cells * q->cell_size) /* extra cell needed for remainder */
+ cells++;
+ len = cells * (q->cell_size + q->cell_overhead);
+ }
+
+ ticks = (u64)len * NSEC_PER_SEC;
+
+ do_div(ticks, q->rate);
+ return PSCHED_NS2TICKS(ticks);
+}
+
+static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
+{
+ struct sk_buff_head *list = &sch->q;
+ psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
+ struct sk_buff *skb;
+
+ if (likely(skb_queue_len(list) < sch->limit)) {
+ skb = skb_peek_tail(list);
+ /* Optimize for add at tail */
+ if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
+ return qdisc_enqueue_tail(nskb, sch);
+
+ skb_queue_reverse_walk(list, skb) {
+ if (tnext >= netem_skb_cb(skb)->time_to_send)
+ break;
+ }
+
+ __skb_queue_after(list, skb, nskb);
+ sch->qstats.backlog += qdisc_pkt_len(nskb);
+ return NET_XMIT_SUCCESS;
+ }
+
+ return qdisc_reshape_fail(nskb, sch);
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -371,9 +428,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
&q->delay_cor, q->delay_dist);
now = psched_get_time();
+
+ if (q->rate) {
+ struct sk_buff_head *list = &sch->q;
+
+ delay += packet_len_2_sched_time(skb->len, q);
+
+ if (!skb_queue_empty(list)) {
+ /*
+ * Last packet in queue is reference point (now).
+ * First packet in queue is already in flight,
+ * calculate this time bonus and substract
+ * from delay.
+ */
+ delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+ now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
+ }
+ }
+
cb->time_to_send = now + delay;
++q->counter;
- ret = qdisc_enqueue(skb, q->qdisc);
+ ret = tfifo_enqueue(skb, sch);
} else {
/*
* Do re-ordering by putting one out of N packets at the front
@@ -382,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cb->time_to_send = psched_get_time();
q->counter = 0;
- __skb_queue_head(&q->qdisc->q, skb);
+ __skb_queue_head(&sch->q, skb);
q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
q->qdisc->qstats.requeues++;
ret = NET_XMIT_SUCCESS;
@@ -395,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
- sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
static unsigned int netem_drop(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- unsigned int len = 0;
+ unsigned int len;
- if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
- sch->q.qlen--;
+ len = qdisc_queue_drop(sch);
+ if (!len && q->qdisc && q->qdisc->ops->drop)
+ len = q->qdisc->ops->drop(q->qdisc);
+ if (len)
sch->qstats.drops++;
- }
+
return len;
}
@@ -419,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
if (qdisc_is_throttled(sch))
return NULL;
- skb = q->qdisc->ops->peek(q->qdisc);
+tfifo_dequeue:
+ skb = qdisc_peek_head(sch);
if (skb) {
const struct netem_skb_cb *cb = netem_skb_cb(skb);
- psched_time_t now = psched_get_time();
/* if more time remaining? */
- if (cb->time_to_send <= now) {
- skb = qdisc_dequeue_peeked(q->qdisc);
+ if (cb->time_to_send <= psched_get_time()) {
+ skb = qdisc_dequeue_tail(sch);
if (unlikely(!skb))
- return NULL;
+ goto qdisc_dequeue;
#ifdef CONFIG_NET_CLS_ACT
/*
@@ -439,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
skb->tstamp.tv64 = 0;
#endif
- sch->q.qlen--;
+ if (q->qdisc) {
+ int err = qdisc_enqueue(skb, q->qdisc);
+
+ if (unlikely(err != NET_XMIT_SUCCESS)) {
+ if (net_xmit_drop_count(err)) {
+ sch->qstats.drops++;
+ qdisc_tree_decrease_qlen(sch, 1);
+ }
+ }
+ goto tfifo_dequeue;
+ }
+deliver:
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
}
+ if (q->qdisc) {
+ skb = q->qdisc->ops->dequeue(q->qdisc);
+ if (skb)
+ goto deliver;
+ }
qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
}
+qdisc_dequeue:
+ if (q->qdisc) {
+ skb = q->qdisc->ops->dequeue(q->qdisc);
+ if (skb)
+ goto deliver;
+ }
return NULL;
}
@@ -455,8 +553,9 @@ static void netem_reset(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- qdisc_reset(q->qdisc);
- sch->q.qlen = 0;
+ qdisc_reset_queue(sch);
+ if (q->qdisc)
+ qdisc_reset(q->qdisc);
qdisc_watchdog_cancel(&q->watchdog);
}
@@ -488,7 +587,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
return -EINVAL;
s = sizeof(struct disttable) + n * sizeof(s16);
- d = kmalloc(s, GFP_KERNEL);
+ d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
if (!d)
d = vmalloc(s);
if (!d)
@@ -501,9 +600,10 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
root_lock = qdisc_root_sleeping_lock(sch);
spin_lock_bh(root_lock);
- dist_free(q->delay_dist);
- q->delay_dist = d;
+ swap(q->delay_dist, d);
spin_unlock_bh(root_lock);
+
+ dist_free(d);
return 0;
}
@@ -535,6 +635,19 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
init_crandom(&q->corrupt_cor, r->correlation);
}
+static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ const struct tc_netem_rate *r = nla_data(attr);
+
+ q->rate = r->rate;
+ q->packet_overhead = r->packet_overhead;
+ q->cell_size = r->cell_size;
+ if (q->cell_size)
+ q->cell_size_reciprocal = reciprocal_value(q->cell_size);
+ q->cell_overhead = r->cell_overhead;
+}
+
static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -548,7 +661,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
case NETEM_LOSS_GI: {
const struct tc_netem_gimodel *gi = nla_data(la);
- if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
+ if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
pr_info("netem: incorrect gi model size\n");
return -EINVAL;
}
@@ -567,8 +680,8 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
case NETEM_LOSS_GE: {
const struct tc_netem_gemodel *ge = nla_data(la);
- if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
- pr_info("netem: incorrect gi model size\n");
+ if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
+ pr_info("netem: incorrect ge model size\n");
return -EINVAL;
}
@@ -594,6 +707,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
[TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
[TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
+ [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
};
@@ -631,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (ret < 0)
return ret;
- ret = fifo_set_limit(q->qdisc, qopt->limit);
- if (ret) {
- pr_info("netem: can't set fifo limit\n");
- return ret;
- }
+ sch->limit = qopt->limit;
q->latency = qopt->latency;
q->jitter = qopt->jitter;
@@ -666,6 +776,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_NETEM_CORRUPT])
get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+ if (tb[TCA_NETEM_RATE])
+ get_rate(sch, tb[TCA_NETEM_RATE]);
+
q->loss_model = CLG_RANDOM;
if (tb[TCA_NETEM_LOSS])
ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
@@ -673,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
return ret;
}
-/*
- * Special case version of FIFO queue for use by netem.
- * It queues in order based on timestamps in skb's
- */
-struct fifo_sched_data {
- u32 limit;
- psched_time_t oldest;
-};
-
-static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
-{
- struct fifo_sched_data *q = qdisc_priv(sch);
- struct sk_buff_head *list = &sch->q;
- psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
- struct sk_buff *skb;
-
- if (likely(skb_queue_len(list) < q->limit)) {
- /* Optimize for add at tail */
- if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
- q->oldest = tnext;
- return qdisc_enqueue_tail(nskb, sch);
- }
-
- skb_queue_reverse_walk(list, skb) {
- const struct netem_skb_cb *cb = netem_skb_cb(skb);
-
- if (tnext >= cb->time_to_send)
- break;
- }
-
- __skb_queue_after(list, skb, nskb);
-
- sch->qstats.backlog += qdisc_pkt_len(nskb);
-
- return NET_XMIT_SUCCESS;
- }
-
- return qdisc_reshape_fail(nskb, sch);
-}
-
-static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
-{
- struct fifo_sched_data *q = qdisc_priv(sch);
-
- if (opt) {
- struct tc_fifo_qopt *ctl = nla_data(opt);
- if (nla_len(opt) < sizeof(*ctl))
- return -EINVAL;
-
- q->limit = ctl->limit;
- } else
- q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
-
- q->oldest = PSCHED_PASTPERFECT;
- return 0;
-}
-
-static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct fifo_sched_data *q = qdisc_priv(sch);
- struct tc_fifo_qopt opt = { .limit = q->limit };
-
- NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
- return skb->len;
-
-nla_put_failure:
- return -1;
-}
-
-static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
- .id = "tfifo",
- .priv_size = sizeof(struct fifo_sched_data),
- .enqueue = tfifo_enqueue,
- .dequeue = qdisc_dequeue_head,
- .peek = qdisc_peek_head,
- .drop = qdisc_queue_drop,
- .init = tfifo_init,
- .reset = qdisc_reset_queue,
- .change = tfifo_init,
- .dump = tfifo_dump,
-};
-
static int netem_init(struct Qdisc *sch, struct nlattr *opt)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -766,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
q->loss_model = CLG_RANDOM;
- q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
- TC_H_MAKE(sch->handle, 1));
- if (!q->qdisc) {
- pr_notice("netem: qdisc create tfifo qdisc failed\n");
- return -ENOMEM;
- }
-
ret = netem_change(sch, opt);
- if (ret) {
+ if (ret)
pr_info("netem: change failed\n");
- qdisc_destroy(q->qdisc);
- }
return ret;
}
@@ -786,7 +808,8 @@ static void netem_destroy(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog);
- qdisc_destroy(q->qdisc);
+ if (q->qdisc)
+ qdisc_destroy(q->qdisc);
dist_free(q->delay_dist);
}
@@ -846,6 +869,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_corr cor;
struct tc_netem_reorder reorder;
struct tc_netem_corrupt corrupt;
+ struct tc_netem_rate rate;
qopt.latency = q->latency;
qopt.jitter = q->jitter;
@@ -868,6 +892,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
corrupt.correlation = q->corrupt_cor.rho;
NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+ rate.rate = q->rate;
+ rate.packet_overhead = q->packet_overhead;
+ rate.cell_size = q->cell_size;
+ rate.cell_overhead = q->cell_overhead;
+ NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
+
if (dump_loss_model(q, skb) != 0)
goto nla_put_failure;
@@ -883,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
{
struct netem_sched_data *q = qdisc_priv(sch);
- if (cl != 1) /* only one class */
+ if (cl != 1 || !q->qdisc) /* only one class */
return -ENOENT;
tcm->tcm_handle |= TC_H_MIN(1);
@@ -897,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
{
struct netem_sched_data *q = qdisc_priv(sch);
- if (new == NULL)
- new = &noop_qdisc;
-
sch_tree_lock(sch);
*old = q->qdisc;
q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
+ if (*old) {
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+ qdisc_reset(*old);
+ }
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 103343408593..2c5ff6148589 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -211,6 +211,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
struct nlattr *tb[TCA_QFQ_MAX + 1];
u32 weight, lmax, inv_w;
int i, err;
+ int delta_w;
if (tca[TCA_OPTIONS] == NULL) {
pr_notice("qfq: no options\n");
@@ -232,9 +233,10 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
- if (q->wsum + weight > QFQ_MAX_WSUM) {
+ delta_w = weight - (cl ? ONE_FP / cl->inv_w : 0);
+ if (q->wsum + delta_w > QFQ_MAX_WSUM) {
pr_notice("qfq: total weight out of range (%u + %u)\n",
- weight, q->wsum);
+ delta_w, q->wsum);
return -EINVAL;
}
@@ -256,13 +258,12 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
- sch_tree_lock(sch);
- if (tb[TCA_QFQ_WEIGHT]) {
- q->wsum = weight - ONE_FP / cl->inv_w;
+ if (inv_w != cl->inv_w) {
+ sch_tree_lock(sch);
+ q->wsum += delta_w;
cl->inv_w = inv_w;
+ sch_tree_unlock(sch);
}
- sch_tree_unlock(sch);
-
return 0;
}
@@ -277,7 +278,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
i = qfq_calc_index(cl->inv_w, cl->lmax);
cl->grp = &q->groups[i];
- q->wsum += weight;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, classid);
@@ -294,6 +294,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
}
+ q->wsum += weight;
sch_tree_lock(sch);
qdisc_class_hash_insert(&q->clhash, &cl->common);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6649463da1b6..ce2256a17d7e 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -39,6 +39,7 @@
struct red_sched_data {
u32 limit; /* HARD maximal queue length */
unsigned char flags;
+ struct timer_list adapt_timer;
struct red_parms parms;
struct red_stats stats;
struct Qdisc *qdisc;
@@ -161,12 +162,15 @@ static void red_reset(struct Qdisc *sch)
static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
+
+ del_timer_sync(&q->adapt_timer);
qdisc_destroy(q->qdisc);
}
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
+ [TCA_RED_MAX_P] = { .type = NLA_U32 },
};
static int red_change(struct Qdisc *sch, struct nlattr *opt)
@@ -176,6 +180,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
struct tc_red_qopt *ctl;
struct Qdisc *child = NULL;
int err;
+ u32 max_P;
if (opt == NULL)
return -EINVAL;
@@ -188,6 +193,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
tb[TCA_RED_STAB] == NULL)
return -EINVAL;
+ max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
+
ctl = nla_data(tb[TCA_RED_PARMS]);
if (ctl->limit > 0) {
@@ -206,21 +213,39 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
}
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
- ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_RED_STAB]));
+ ctl->Plog, ctl->Scell_log,
+ nla_data(tb[TCA_RED_STAB]),
+ max_P);
- if (skb_queue_empty(&sch->q))
- red_end_of_idle_period(&q->parms);
+ del_timer(&q->adapt_timer);
+ if (ctl->flags & TC_RED_ADAPTATIVE)
+ mod_timer(&q->adapt_timer, jiffies + HZ/2);
+
+ if (!q->qdisc->q.qlen)
+ red_start_of_idle_period(&q->parms);
sch_tree_unlock(sch);
return 0;
}
+static inline void red_adaptative_timer(unsigned long arg)
+{
+ struct Qdisc *sch = (struct Qdisc *)arg;
+ struct red_sched_data *q = qdisc_priv(sch);
+ spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+
+ spin_lock(root_lock);
+ red_adaptative_algo(&q->parms);
+ mod_timer(&q->adapt_timer, jiffies + HZ/2);
+ spin_unlock(root_lock);
+}
+
static int red_init(struct Qdisc *sch, struct nlattr *opt)
{
struct red_sched_data *q = qdisc_priv(sch);
q->qdisc = &noop_qdisc;
+ setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
return red_change(sch, opt);
}
@@ -243,6 +268,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+ NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index e83c272c0325..96e42cae4c7a 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,6 +26,7 @@
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
+#include <net/flow_keys.h>
/*
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -286,6 +287,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
u32 minqlen = ~0;
u32 r, slot, salt, sfbhash;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ struct flow_keys keys;
if (unlikely(sch->q.qlen >= q->limit)) {
sch->qstats.overlimits++;
@@ -309,13 +311,19 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, q, &ret, &salt))
goto other_drop;
+ keys.src = salt;
+ keys.dst = 0;
+ keys.ports = 0;
} else {
- salt = skb_get_rxhash(skb);
+ skb_flow_dissect(skb, &keys);
}
slot = q->slot;
- sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+ sfbhash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src,
+ (__force u32)keys.ports,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -347,7 +355,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(p_min >= SFB_MAX_PROB)) {
/* Inelastic flow */
if (q->double_buffering) {
- sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+ sfbhash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src,
+ (__force u32)keys.ports,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4f5510e2bd6f..e9d5c911576d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -17,14 +17,13 @@
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/init.h>
-#include <linux/ipv6.h>
#include <linux/skbuff.h>
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/flow_keys.h>
/* Stochastic Fairness Queuing algorithm.
@@ -137,61 +136,31 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
return &q->dep[val - SFQ_SLOTS];
}
-static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+/*
+ * In order to be able to quickly rehash our queue when timer changes
+ * q->perturbation, we store flow_keys in skb->cb[]
+ */
+struct sfq_skb_cb {
+ struct flow_keys keys;
+};
+
+static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
{
- return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
+ BUILD_BUG_ON(sizeof(skb->cb) <
+ sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
+ return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
}
-static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(const struct sfq_sched_data *q,
+ const struct sk_buff *skb)
{
- u32 h, h2;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- {
- const struct iphdr *iph;
- int poff;
-
- if (!pskb_network_may_pull(skb, sizeof(*iph)))
- goto err;
- iph = ip_hdr(skb);
- h = (__force u32)iph->daddr;
- h2 = (__force u32)iph->saddr ^ iph->protocol;
- if (ip_is_fragment(iph))
- break;
- poff = proto_ports_offset(iph->protocol);
- if (poff >= 0 &&
- pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
- iph = ip_hdr(skb);
- h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
- }
- break;
- }
- case htons(ETH_P_IPV6):
- {
- const struct ipv6hdr *iph;
- int poff;
-
- if (!pskb_network_may_pull(skb, sizeof(*iph)))
- goto err;
- iph = ipv6_hdr(skb);
- h = (__force u32)iph->daddr.s6_addr32[3];
- h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
- poff = proto_ports_offset(iph->nexthdr);
- if (poff >= 0 &&
- pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
- iph = ipv6_hdr(skb);
- h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
- }
- break;
- }
- default:
-err:
- h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
- h2 = (unsigned long)skb->sk;
- }
+ const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
+ unsigned int hash;
- return sfq_fold_hash(q, h, h2);
+ hash = jhash_3words((__force u32)keys->dst,
+ (__force u32)keys->src ^ keys->ip_proto,
+ (__force u32)keys->ports, q->perturbation);
+ return hash & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -206,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
TC_H_MIN(skb->priority) <= q->divisor)
return TC_H_MIN(skb->priority);
- if (!q->filter_list)
+ if (!q->filter_list) {
+ skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
return sfq_hash(q, skb) + 1;
+ }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tc_classify(skb, q->filter_list, &res);
@@ -395,11 +366,11 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (slot->qlen == 1) { /* The flow is new */
if (q->tail == NULL) { /* It is the first flow */
slot->next = x;
+ q->tail = slot;
} else {
slot->next = q->tail->next;
q->tail->next = x;
}
- q->tail = slot;
slot->allot = q->scaled_quantum;
}
if (++sch->q.qlen <= q->limit)
@@ -468,12 +439,71 @@ sfq_reset(struct Qdisc *sch)
kfree_skb(skb);
}
+/*
+ * When q->perturbation is changed, we rehash all queued skbs
+ * to avoid OOO (Out Of Order) effects.
+ * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
+ * counters.
+ */
+static void sfq_rehash(struct sfq_sched_data *q)
+{
+ struct sk_buff *skb;
+ int i;
+ struct sfq_slot *slot;
+ struct sk_buff_head list;
+
+ __skb_queue_head_init(&list);
+
+ for (i = 0; i < SFQ_SLOTS; i++) {
+ slot = &q->slots[i];
+ if (!slot->qlen)
+ continue;
+ while (slot->qlen) {
+ skb = slot_dequeue_head(slot);
+ sfq_dec(q, i);
+ __skb_queue_tail(&list, skb);
+ }
+ q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+ }
+ q->tail = NULL;
+
+ while ((skb = __skb_dequeue(&list)) != NULL) {
+ unsigned int hash = sfq_hash(q, skb);
+ sfq_index x = q->ht[hash];
+
+ slot = &q->slots[x];
+ if (x == SFQ_EMPTY_SLOT) {
+ x = q->dep[0].next; /* get a free slot */
+ q->ht[hash] = x;
+ slot = &q->slots[x];
+ slot->hash = hash;
+ }
+ slot_queue_add(slot, skb);
+ sfq_inc(q, x);
+ if (slot->qlen == 1) { /* The flow is new */
+ if (q->tail == NULL) { /* It is the first flow */
+ slot->next = x;
+ } else {
+ slot->next = q->tail->next;
+ q->tail->next = x;
+ }
+ q->tail = slot;
+ slot->allot = q->scaled_quantum;
+ }
+ }
+}
+
static void sfq_perturbation(unsigned long arg)
{
struct Qdisc *sch = (struct Qdisc *)arg;
struct sfq_sched_data *q = qdisc_priv(sch);
+ spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spin_lock(root_lock);
q->perturbation = net_random();
+ if (!q->filter_list && q->tail)
+ sfq_rehash(q);
+ spin_unlock(root_lock);
if (q->perturb_period)
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1dcfb5223a86..b8e156319d7b 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -346,6 +346,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nest;
struct tc_tbf_qopt opt;
+ sch->qstats.backlog = q->qdisc->qstats.backlog;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index a3b7120fcc74..45326599fda3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
static int
-__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
+ struct net_device *dev, struct netdev_queue *txq,
+ struct neighbour *mn)
{
- struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
- struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
- struct neighbour *mn = dst_get_neighbour(skb_dst(skb));
+ struct teql_sched_data *q = qdisc_priv(txq->qdisc);
struct neighbour *n = q->ncache;
if (mn->tbl == NULL)
@@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
}
static inline int teql_resolve(struct sk_buff *skb,
- struct sk_buff *skb_res, struct net_device *dev)
+ struct sk_buff *skb_res,
+ struct net_device *dev,
+ struct netdev_queue *txq)
{
- struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+ struct dst_entry *dst = skb_dst(skb);
+ struct neighbour *mn;
+ int res;
+
if (txq->qdisc == &noop_qdisc)
return -ENODEV;
- if (dev->header_ops == NULL ||
- skb_dst(skb) == NULL ||
- dst_get_neighbour(skb_dst(skb)) == NULL)
+ if (!dev->header_ops || !dst)
return 0;
- return __teql_resolve(skb, skb_res, dev);
+
+ rcu_read_lock();
+ mn = dst_get_neighbour_noref(dst);
+ res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
+ rcu_read_unlock();
+
+ return res;
}
static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -301,18 +310,18 @@ restart:
if (slave_txq->qdisc_sleeping != q)
continue;
- if (__netif_subqueue_stopped(slave, subq) ||
+ if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
!netif_running(slave)) {
busy = 1;
continue;
}
- switch (teql_resolve(skb, skb_res, slave)) {
+ switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
case 0:
if (__netif_tx_trylock(slave_txq)) {
unsigned int length = qdisc_pkt_len(skb);
- if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
+ if (!netif_xmit_frozen_or_stopped(slave_txq) &&
slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
txq_trans_update(slave_txq);
__netif_tx_unlock(slave_txq);
@@ -324,7 +333,7 @@ restart:
}
__netif_tx_unlock(slave_txq);
}
- if (netif_queue_stopped(dev))
+ if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
busy = 1;
break;
case 1:
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 152b5b3c3fff..acd2edbc073e 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -173,7 +173,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
- (unsigned long)sp->autoclose * HZ;
+ min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ;
/* Initializes the timers */
for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 865e68fef21c..bf812048cf6f 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -82,7 +82,7 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp)
struct sctp_auth_bytes *key;
/* Verify that we are not going to overflow INT_MAX */
- if ((INT_MAX - key_len) < sizeof(struct sctp_auth_bytes))
+ if (key_len > (INT_MAX - sizeof(struct sctp_auth_bytes)))
return NULL;
/* Allocate the shared key */
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b7692aab6e9c..80f71af71384 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -105,7 +105,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
struct sctp_input_cb {
union {
struct inet_skb_parm h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct inet6_skb_parm h6;
#endif
} header;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 810427833bcd..91f479121c55 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -107,7 +107,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
if (addr) {
addr->a.v6.sin6_family = AF_INET6;
addr->a.v6.sin6_port = 0;
- ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifa->addr);
+ addr->a.v6.sin6_addr = ifa->addr;
addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
addr->valid = 1;
spin_lock_bh(&sctp_local_addr_lock);
@@ -219,8 +219,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
/* Fill in the dest address from the route entry passed with the skb
* and the source address from the transport.
*/
- ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr);
- ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr);
+ fl6.daddr = transport->ipaddr.v6.sin6_addr;
+ fl6.saddr = transport->saddr.v6.sin6_addr;
fl6.flowlabel = np->flow_label;
IP6_ECN_flow_xmit(sk, fl6.flowlabel);
@@ -231,7 +231,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
if (np->opt && np->opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
- ipv6_addr_copy(&fl6.daddr, rt0->addr);
+ fl6.daddr = *rt0->addr;
}
SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
@@ -265,7 +265,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
sctp_scope_t scope;
memset(fl6, 0, sizeof(struct flowi6));
- ipv6_addr_copy(&fl6->daddr, &daddr->v6.sin6_addr);
+ fl6->daddr = daddr->v6.sin6_addr;
fl6->fl6_dport = daddr->v6.sin6_port;
fl6->flowi6_proto = IPPROTO_SCTP;
if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -277,7 +277,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl6->fl6_sport = htons(asoc->base.bind_addr.port);
if (saddr) {
- ipv6_addr_copy(&fl6->saddr, &saddr->v6.sin6_addr);
+ fl6->saddr = saddr->v6.sin6_addr;
fl6->fl6_sport = saddr->v6.sin6_port;
SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr);
}
@@ -334,7 +334,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
}
rcu_read_unlock();
if (baddr) {
- ipv6_addr_copy(&fl6->saddr, &baddr->v6.sin6_addr);
+ fl6->saddr = baddr->v6.sin6_addr;
fl6->fl6_sport = baddr->v6.sin6_port;
dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
}
@@ -375,7 +375,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
if (t->dst) {
saddr->v6.sin6_family = AF_INET6;
- ipv6_addr_copy(&saddr->v6.sin6_addr, &fl6->saddr);
+ saddr->v6.sin6_addr = fl6->saddr;
}
}
@@ -400,7 +400,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
if (addr) {
addr->a.v6.sin6_family = AF_INET6;
addr->a.v6.sin6_port = 0;
- ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
+ addr->a.v6.sin6_addr = ifp->addr;
addr->a.v6.sin6_scope_id = dev->ifindex;
addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
@@ -416,7 +416,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
int is_saddr)
{
- void *from;
__be16 *port;
struct sctphdr *sh;
@@ -428,12 +427,11 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
sh = sctp_hdr(skb);
if (is_saddr) {
*port = sh->source;
- from = &ipv6_hdr(skb)->saddr;
+ addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
} else {
*port = sh->dest;
- from = &ipv6_hdr(skb)->daddr;
+ addr->v6.sin6_addr = ipv6_hdr(skb)->daddr;
}
- ipv6_addr_copy(&addr->v6.sin6_addr, from);
}
/* Initialize an sctp_addr from a socket. */
@@ -441,7 +439,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
{
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = 0;
- ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
+ addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
}
/* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -454,7 +452,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
addr->v4.sin_addr.s_addr;
} else {
- ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
+ inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
}
}
@@ -467,7 +465,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
} else {
- ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
+ inet6_sk(sk)->daddr = addr->v6.sin6_addr;
}
}
@@ -479,7 +477,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = port;
addr->v6.sin6_flowinfo = 0; /* BUG */
- ipv6_addr_copy(&addr->v6.sin6_addr, &param->v6.addr);
+ addr->v6.sin6_addr = param->v6.addr;
addr->v6.sin6_scope_id = iif;
}
@@ -493,7 +491,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr,
param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
param->v6.param_hdr.length = htons(length);
- ipv6_addr_copy(&param->v6.addr, &addr->v6.sin6_addr);
+ param->v6.addr = addr->v6.sin6_addr;
return length;
}
@@ -504,7 +502,7 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
{
addr->sa.sa_family = AF_INET6;
addr->v6.sin6_port = port;
- ipv6_addr_copy(&addr->v6.sin6_addr, saddr);
+ addr->v6.sin6_addr = *saddr;
}
/* Compare addresses exactly.
@@ -759,7 +757,7 @@ static void sctp_inet6_event_msgname(struct sctp_ulpevent *event,
}
sin6from = &asoc->peer.primary_addr.v6;
- ipv6_addr_copy(&sin6->sin6_addr, &sin6from->sin6_addr);
+ sin6->sin6_addr = sin6from->sin6_addr;
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6->sin6_scope_id = sin6from->sin6_scope_id;
}
@@ -787,7 +785,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
}
/* Otherwise, just copy the v6 address. */
- ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
+ sin6->sin6_addr = ipv6_hdr(skb)->saddr;
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
struct sctp_ulpevent *ev = sctp_skb2event(skb);
sin6->sin6_scope_id = ev->iif;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 08b3cead6503..817174eb5f41 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -697,13 +697,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
/* Keep track of how many bytes are in flight to the receiver. */
asoc->outqueue.outstanding_bytes += datasize;
- /* Update our view of the receiver's rwnd. Include sk_buff overhead
- * while updating peer.rwnd so that it reduces the chances of a
- * receiver running out of receive buffer space even when receive
- * window is still open. This can happen when a sender is sending
- * sending small messages.
- */
- datasize += sizeof(struct sk_buff);
+ /* Update our view of the receiver's rwnd. */
if (datasize < rwnd)
rwnd -= datasize;
else
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 14c2b06028ff..cfeb1d4a1ee6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -411,8 +411,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
chunk->transport->flight_size -=
sctp_data_size(chunk);
q->outstanding_bytes -= sctp_data_size(chunk);
- q->asoc->peer.rwnd += (sctp_data_size(chunk) +
- sizeof(struct sk_buff));
+ q->asoc->peer.rwnd += sctp_data_size(chunk);
}
continue;
}
@@ -432,8 +431,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
* (Section 7.2.4)), add the data size of those
* chunks to the rwnd.
*/
- q->asoc->peer.rwnd += (sctp_data_size(chunk) +
- sizeof(struct sk_buff));
+ q->asoc->peer.rwnd += sctp_data_size(chunk);
q->outstanding_bytes -= sctp_data_size(chunk);
if (chunk->transport)
transport->flight_size -= sctp_data_size(chunk);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 61b9fca5a173..5942d27b1444 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -637,7 +637,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
" for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state,
addrw);
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/* Now we send an ASCONF for each association */
/* Note. we currently don't handle link local IPv6 addressees */
if (addrw->a.sa.sa_family == AF_INET6) {
@@ -1285,6 +1285,9 @@ SCTP_STATIC __init int sctp_init(void)
sctp_max_instreams = SCTP_DEFAULT_INSTREAMS;
sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS;
+ /* Initialize maximum autoclose timeout. */
+ sctp_max_autoclose = INT_MAX / HZ;
+
/* Initialize handle used for association ids. */
idr_init(&sctp_assocs_id);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 0121e0ab0351..a85eeeb55dd0 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3400,8 +3400,10 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
asconf_len -= length;
}
- if (no_err && asoc->src_out_of_asoc_ok)
+ if (no_err && asoc->src_out_of_asoc_ok) {
asoc->src_out_of_asoc_ok = 0;
+ sctp_transport_immediate_rtx(asoc->peer.primary_path);
+ }
/* Free the cached last sent asconf chunk. */
list_del_init(&asconf->transmitted_list);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 76388b083f28..1ff51c9d18d5 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -666,6 +666,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
struct sctp_chunk *chunk)
{
sctp_sender_hb_info_t *hbinfo;
+ int was_unconfirmed = 0;
/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
* HEARTBEAT should clear the error counter of the destination
@@ -692,9 +693,11 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
/* Mark the destination transport address as active if it is not so
* marked.
*/
- if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED))
+ if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED)) {
+ was_unconfirmed = 1;
sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP,
SCTP_HEARTBEAT_SUCCESS);
+ }
/* The receiver of the HEARTBEAT ACK should also perform an
* RTT measurement for that destination transport address
@@ -712,6 +715,9 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
/* Update the heartbeat timer. */
if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t)))
sctp_transport_hold(t);
+
+ if (was_unconfirmed && asoc->peer.transport_count == 1)
+ sctp_transport_immediate_rtx(t);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13bf5fcdbff1..408ebd0e7330 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -804,7 +804,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *)addrs;
- ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr);
+ asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr;
}
SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ",
" at %p\n", asoc, asoc->asconf_addr_del_pending,
@@ -2200,8 +2200,6 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
return -EINVAL;
if (copy_from_user(&sp->autoclose, optval, optlen))
return -EFAULT;
- /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */
- sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ);
return 0;
}
@@ -6841,7 +6839,7 @@ struct proto sctp_prot = {
.sockets_allocated = &sctp_sockets_allocated,
};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct proto sctpv6_prot = {
.name = "SCTPv6",
@@ -6872,4 +6870,4 @@ struct proto sctpv6_prot = {
.memory_allocated = &sctp_memory_allocated,
.sockets_allocated = &sctp_sockets_allocated,
};
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 6b3952961b85..60ffbd067ff7 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -53,6 +53,10 @@ static int sack_timer_min = 1;
static int sack_timer_max = 500;
static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
static int rwnd_scale_max = 16;
+static unsigned long max_autoclose_min = 0;
+static unsigned long max_autoclose_max =
+ (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
+ ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ;
extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
@@ -258,6 +262,15 @@ static ctl_table sctp_table[] = {
.extra1 = &one,
.extra2 = &rwnd_scale_max,
},
+ {
+ .procname = "max_autoclose",
+ .data = &sctp_max_autoclose,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &proc_doulongvec_minmax,
+ .extra1 = &max_autoclose_min,
+ .extra2 = &max_autoclose_max,
+ },
{ /* sentinel */ }
};
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 394c57ca2f54..3889330b7b04 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -641,3 +641,19 @@ void sctp_transport_reset(struct sctp_transport *t)
t->cacc.next_tsn_at_change = 0;
t->cacc.cacc_saw_newack = 0;
}
+
+/* Schedule retransmission on the given transport */
+void sctp_transport_immediate_rtx(struct sctp_transport *t)
+{
+ /* Stop pending T3_rtx_timer */
+ if (timer_pending(&t->T3_rtx_timer)) {
+ (void)del_timer(&t->T3_rtx_timer);
+ sctp_transport_put(t);
+ }
+ sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX);
+ if (!timer_pending(&t->T3_rtx_timer)) {
+ if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
+ sctp_transport_hold(t);
+ }
+ return;
+}
diff --git a/net/socket.c b/net/socket.c
index 425ef4270460..e62b4f055071 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,6 +551,8 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
sock_update_classid(sock->sk);
+ sock_update_netprioidx(sock->sk);
+
si->sock = sock;
si->scm = NULL;
si->msg = msg;
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 67a655ee82a9..ee77742e0ed6 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -21,7 +21,7 @@
#include <linux/slab.h>
#include <linux/export.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
char *buf, const int buflen)
@@ -91,7 +91,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
return len;
}
-#else /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+#else /* !IS_ENABLED(CONFIG_IPV6) */
static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
char *buf, const int buflen)
@@ -105,7 +105,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
return 0;
}
-#endif /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+#endif /* !IS_ENABLED(CONFIG_IPV6) */
static int rpc_ntop4(const struct sockaddr *sap,
char *buf, const size_t buflen)
@@ -155,7 +155,7 @@ static size_t rpc_pton4(const char *buf, const size_t buflen,
return sizeof(struct sockaddr_in);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
static int rpc_parse_scope_id(const char *buf, const size_t buflen,
const char *delim, struct sockaddr_in6 *sin6)
{
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d12ffa545811..00a1a2acd587 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -590,6 +590,27 @@ void rpc_prepare_task(struct rpc_task *task)
task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
}
+static void
+rpc_init_task_statistics(struct rpc_task *task)
+{
+ /* Initialize retry counters */
+ task->tk_garb_retry = 2;
+ task->tk_cred_retry = 2;
+ task->tk_rebind_retry = 2;
+
+ /* starting timestamp */
+ task->tk_start = ktime_get();
+}
+
+static void
+rpc_reset_task_statistics(struct rpc_task *task)
+{
+ task->tk_timeouts = 0;
+ task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT);
+
+ rpc_init_task_statistics(task);
+}
+
/*
* Helper that calls task->tk_ops->rpc_call_done if it exists
*/
@@ -602,6 +623,7 @@ void rpc_exit_task(struct rpc_task *task)
WARN_ON(RPC_ASSASSINATED(task));
/* Always release the RPC slot and buffer memory */
xprt_release(task);
+ rpc_reset_task_statistics(task);
}
}
}
@@ -804,11 +826,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
task->tk_calldata = task_setup_data->callback_data;
INIT_LIST_HEAD(&task->tk_task);
- /* Initialize retry counters */
- task->tk_garb_retry = 2;
- task->tk_cred_retry = 2;
- task->tk_rebind_retry = 2;
-
task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
task->tk_owner = current->tgid;
@@ -818,8 +835,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
- /* starting timestamp */
- task->tk_start = ktime_get();
+ rpc_init_task_statistics(task);
dprintk("RPC: new task initialized, procpid %u\n",
task_pid_nr(current));
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e038884ae0c..9d01d46b05f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -826,7 +826,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
return error;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
/*
* Register an "inet6" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
@@ -872,7 +872,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
return error;
}
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
/*
* Register a kernel RPC service via rpcbind version 4.
@@ -893,11 +893,11 @@ static int __svc_register(const char *progname,
error = __svc_rpcb_register4(program, version,
protocol, port);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case PF_INET6:
error = __svc_rpcb_register6(program, version,
protocol, port);
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
}
if (error < 0)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 447cd0eb415c..38649cfa4e81 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -179,13 +179,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = htons(port),
};
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
struct sockaddr *sap;
size_t len;
@@ -194,12 +194,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
sap = (struct sockaddr *)&sin;
len = sizeof(sin);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case PF_INET6:
sap = (struct sockaddr *)&sin6;
len = sizeof(sin6);
break;
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
default:
return ERR_PTR(-EAFNOSUPPORT);
}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index ce136323da8b..01153ead1dba 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -134,7 +134,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
struct ip_map *item = container_of(citem, struct ip_map, h);
strcpy(new->m_class, item->m_class);
- ipv6_addr_copy(&new->m_addr, &item->m_addr);
+ new->m_addr = item->m_addr;
}
static void update(struct cache_head *cnew, struct cache_head *citem)
{
@@ -220,7 +220,7 @@ static int ip_map_parse(struct cache_detail *cd,
ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
&sin6.sin6_addr);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
memcpy(&sin6, &address.s6, sizeof(sin6));
break;
@@ -274,7 +274,7 @@ static int ip_map_show(struct seq_file *m,
}
im = container_of(h, struct ip_map, h);
/* class addr domain */
- ipv6_addr_copy(&addr, &im->m_addr);
+ addr = im->m_addr;
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
@@ -297,7 +297,7 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
struct cache_head *ch;
strcpy(ip.m_class, class);
- ipv6_addr_copy(&ip.m_addr, addr);
+ ip.m_addr = *addr;
ch = sunrpc_cache_lookup(cd, &ip.h,
hash_str(class, IP_HASHBITS) ^
hash_ip6(*addr));
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 71bed1c1c77a..4653286fcc9e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -157,7 +157,7 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
cmh->cmsg_level = SOL_IPV6;
cmh->cmsg_type = IPV6_PKTINFO;
pki->ipi6_ifindex = daddr->sin6_scope_id;
- ipv6_addr_copy(&pki->ipi6_addr, &daddr->sin6_addr);
+ pki->ipi6_addr = daddr->sin6_addr;
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
@@ -523,7 +523,7 @@ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
return 0;
daddr->sin6_family = AF_INET6;
- ipv6_addr_copy(&daddr->sin6_addr, &pki->ipi6_addr);
+ daddr->sin6_addr = pki->ipi6_addr;
daddr->sin6_scope_id = pki->ipi6_ifindex;
return 1;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f4385e45a5fc..c64c0ef519b5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -995,13 +995,11 @@ out_init_req:
static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
- if (xprt_dynamic_free_slot(xprt, req))
- return;
-
- memset(req, 0, sizeof(*req)); /* mark unused */
-
spin_lock(&xprt->reserve_lock);
- list_add(&req->rq_list, &xprt->free);
+ if (!xprt_dynamic_free_slot(xprt, req)) {
+ memset(req, 0, sizeof(*req)); /* mark unused */
+ list_add(&req->rq_list, &xprt->free);
+ }
rpc_wake_up_next(&xprt->backlog);
spin_unlock(&xprt->reserve_lock);
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d7f97ef26590..55472c48825e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -496,7 +496,7 @@ static int xs_nospace(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- int ret = 0;
+ int ret = -EAGAIN;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
@@ -508,7 +508,6 @@ static int xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
- ret = -EAGAIN;
/*
* Notify TCP that we're limited by the application
* window size
@@ -2530,8 +2529,10 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
int err;
err = xs_init_anyaddr(args->dstaddr->sa_family,
(struct sockaddr *)&new->srcaddr);
- if (err != 0)
+ if (err != 0) {
+ xprt_free(xprt);
return ERR_PTR(err);
+ }
}
return xprt;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 28908f54459e..8eb87b11d100 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -46,7 +46,7 @@
#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
/**
- * struct bcbearer_pair - a pair of bearers used by broadcast link
+ * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
* @primary: pointer to primary bearer
* @secondary: pointer to secondary bearer
*
@@ -54,13 +54,13 @@
* to be paired.
*/
-struct bcbearer_pair {
+struct tipc_bcbearer_pair {
struct tipc_bearer *primary;
struct tipc_bearer *secondary;
};
/**
- * struct bcbearer - bearer used by broadcast link
+ * struct tipc_bcbearer - bearer used by broadcast link
* @bearer: (non-standard) broadcast bearer structure
* @media: (non-standard) broadcast media structure
* @bpairs: array of bearer pairs
@@ -74,38 +74,40 @@ struct bcbearer_pair {
* prevented through use of the spinlock "bc_lock".
*/
-struct bcbearer {
+struct tipc_bcbearer {
struct tipc_bearer bearer;
- struct media media;
- struct bcbearer_pair bpairs[MAX_BEARERS];
- struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
+ struct tipc_media media;
+ struct tipc_bcbearer_pair bpairs[MAX_BEARERS];
+ struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
struct tipc_node_map remains;
struct tipc_node_map remains_new;
};
/**
- * struct bclink - link used for broadcast messages
+ * struct tipc_bclink - link used for broadcast messages
* @link: (non-standard) broadcast link structure
* @node: (non-standard) node structure representing b'cast link's peer node
+ * @bcast_nodes: map of broadcast-capable nodes
* @retransmit_to: node that most recently requested a retransmit
*
* Handles sequence numbering, fragmentation, bundling, etc.
*/
-struct bclink {
- struct link link;
+struct tipc_bclink {
+ struct tipc_link link;
struct tipc_node node;
+ struct tipc_node_map bcast_nodes;
struct tipc_node *retransmit_to;
};
+static struct tipc_bcbearer bcast_bearer;
+static struct tipc_bclink bcast_link;
-static struct bcbearer *bcbearer;
-static struct bclink *bclink;
-static struct link *bcl;
-static DEFINE_SPINLOCK(bc_lock);
+static struct tipc_bcbearer *bcbearer = &bcast_bearer;
+static struct tipc_bclink *bclink = &bcast_link;
+static struct tipc_link *bcl = &bcast_link.link;
-/* broadcast-capable node map */
-struct tipc_node_map tipc_bcast_nmap;
+static DEFINE_SPINLOCK(bc_lock);
const char tipc_bclink_name[] = "broadcast-link";
@@ -113,11 +115,6 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a,
struct tipc_node_map *nm_b,
struct tipc_node_map *nm_diff);
-static u32 buf_seqno(struct sk_buff *buf)
-{
- return msg_seqno(buf_msg(buf));
-}
-
static u32 bcbuf_acks(struct sk_buff *buf)
{
return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
@@ -133,6 +130,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
bcbuf_set_acks(buf, bcbuf_acks(buf) - 1);
}
+void tipc_bclink_add_node(u32 addr)
+{
+ spin_lock_bh(&bc_lock);
+ tipc_nmap_add(&bclink->bcast_nodes, addr);
+ spin_unlock_bh(&bc_lock);
+}
+
+void tipc_bclink_remove_node(u32 addr)
+{
+ spin_lock_bh(&bc_lock);
+ tipc_nmap_remove(&bclink->bcast_nodes, addr);
+ spin_unlock_bh(&bc_lock);
+}
static void bclink_set_last_sent(void)
{
@@ -222,14 +232,36 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
struct sk_buff *next;
unsigned int released = 0;
- if (less_eq(acked, n_ptr->bclink.acked))
- return;
-
spin_lock_bh(&bc_lock);
- /* Skip over packets that node has previously acknowledged */
-
+ /* Bail out if tx queue is empty (no clean up is required) */
crs = bcl->first_out;
+ if (!crs)
+ goto exit;
+
+ /* Determine which messages need to be acknowledged */
+ if (acked == INVALID_LINK_SEQ) {
+ /*
+ * Contact with specified node has been lost, so need to
+ * acknowledge sent messages only (if other nodes still exist)
+ * or both sent and unsent messages (otherwise)
+ */
+ if (bclink->bcast_nodes.count)
+ acked = bcl->fsm_msg_cnt;
+ else
+ acked = bcl->next_out_no;
+ } else {
+ /*
+ * Bail out if specified sequence number does not correspond
+ * to a message that has been sent and not yet acknowledged
+ */
+ if (less(acked, buf_seqno(crs)) ||
+ less(bcl->fsm_msg_cnt, acked) ||
+ less_eq(acked, n_ptr->bclink.acked))
+ goto exit;
+ }
+
+ /* Skip over packets that node has previously acknowledged */
while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked))
crs = crs->next;
@@ -237,7 +269,15 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
while (crs && less_eq(buf_seqno(crs), acked)) {
next = crs->next;
- bcbuf_decr_acks(crs);
+
+ if (crs != bcl->next_out)
+ bcbuf_decr_acks(crs);
+ else {
+ bcbuf_set_acks(crs, 0);
+ bcl->next_out = next;
+ bclink_set_last_sent();
+ }
+
if (bcbuf_acks(crs) == 0) {
bcl->first_out = next;
bcl->out_queue_size--;
@@ -256,6 +296,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
}
if (unlikely(released && !list_empty(&bcl->waiting_ports)))
tipc_link_wakeup_ports(bcl, 0);
+exit:
spin_unlock_bh(&bc_lock);
}
@@ -267,7 +308,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
static void bclink_send_ack(struct tipc_node *n_ptr)
{
- struct link *l_ptr = n_ptr->active_links[n_ptr->addr & 1];
+ struct tipc_link *l_ptr = n_ptr->active_links[n_ptr->addr & 1];
if (l_ptr != NULL)
tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
@@ -402,13 +443,19 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
spin_lock_bh(&bc_lock);
+ if (!bclink->bcast_nodes.count) {
+ res = msg_data_sz(buf_msg(buf));
+ buf_discard(buf);
+ goto exit;
+ }
+
res = tipc_link_send_buf(bcl, buf);
- if (likely(res > 0))
+ if (likely(res >= 0)) {
bclink_set_last_sent();
-
- bcl->stats.queue_sz_counts++;
- bcl->stats.accu_queue_sz += bcl->out_queue_size;
-
+ bcl->stats.queue_sz_counts++;
+ bcl->stats.accu_queue_sz += bcl->out_queue_size;
+ }
+exit:
spin_unlock_bh(&bc_lock);
return res;
}
@@ -572,13 +619,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
if (likely(!msg_non_seq(buf_msg(buf)))) {
struct tipc_msg *msg;
- bcbuf_set_acks(buf, tipc_bcast_nmap.count);
+ bcbuf_set_acks(buf, bclink->bcast_nodes.count);
msg = buf_msg(buf);
msg_set_non_seq(msg, 1);
msg_set_mc_netid(msg, tipc_net_id);
bcl->stats.sent_info++;
- if (WARN_ON(!tipc_bcast_nmap.count)) {
+ if (WARN_ON(!bclink->bcast_nodes.count)) {
dump_stack();
return 0;
}
@@ -586,7 +633,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
/* Send buffer over bearers until all targets reached */
- bcbearer->remains = tipc_bcast_nmap;
+ bcbearer->remains = bclink->bcast_nodes;
for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
@@ -630,8 +677,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
void tipc_bcbearer_sort(void)
{
- struct bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
- struct bcbearer_pair *bp_curr;
+ struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
+ struct tipc_bcbearer_pair *bp_curr;
int b_index;
int pri;
@@ -752,25 +799,13 @@ int tipc_bclink_set_queue_limits(u32 limit)
return 0;
}
-int tipc_bclink_init(void)
+void tipc_bclink_init(void)
{
- bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
- bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
- if (!bcbearer || !bclink) {
- warn("Broadcast link creation failed, no memory\n");
- kfree(bcbearer);
- bcbearer = NULL;
- kfree(bclink);
- bclink = NULL;
- return -ENOMEM;
- }
-
INIT_LIST_HEAD(&bcbearer->bearer.cong_links);
bcbearer->bearer.media = &bcbearer->media;
bcbearer->media.send_msg = tipc_bcbearer_send;
sprintf(bcbearer->media.name, "tipc-broadcast");
- bcl = &bclink->link;
INIT_LIST_HEAD(&bcl->waiting_ports);
bcl->next_out_no = 1;
spin_lock_init(&bclink->node.lock);
@@ -780,22 +815,16 @@ int tipc_bclink_init(void)
bcl->b_ptr = &bcbearer->bearer;
bcl->state = WORKING_WORKING;
strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
-
- return 0;
}
void tipc_bclink_stop(void)
{
spin_lock_bh(&bc_lock);
- if (bcbearer) {
- tipc_link_stop(bcl);
- bcl = NULL;
- kfree(bclink);
- bclink = NULL;
- kfree(bcbearer);
- bcbearer = NULL;
- }
+ tipc_link_stop(bcl);
spin_unlock_bh(&bc_lock);
+
+ memset(bclink, 0, sizeof(*bclink));
+ memset(bcbearer, 0, sizeof(*bcbearer));
}
@@ -864,9 +893,9 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a,
* tipc_port_list_add - add a port to a port list, ensuring no duplicates
*/
-void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
+void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port)
{
- struct port_list *item = pl_ptr;
+ struct tipc_port_list *item = pl_ptr;
int i;
int item_sz = PLSIZE;
int cnt = pl_ptr->count;
@@ -898,10 +927,10 @@ void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
*
*/
-void tipc_port_list_free(struct port_list *pl_ptr)
+void tipc_port_list_free(struct tipc_port_list *pl_ptr)
{
- struct port_list *item;
- struct port_list *next;
+ struct tipc_port_list *item;
+ struct tipc_port_list *next;
for (item = pl_ptr->next; item; item = next) {
next = item->next;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 06740da5ae61..b009666c60b0 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -51,20 +51,18 @@ struct tipc_node_map {
u32 map[MAX_NODES / WSIZE];
};
-extern struct tipc_node_map tipc_bcast_nmap;
-
#define PLSIZE 32
/**
- * struct port_list - set of node local destination ports
+ * struct tipc_port_list - set of node local destination ports
* @count: # of ports in set (only valid for first entry in list)
* @next: pointer to next entry in list
* @ports: array of port references
*/
-struct port_list {
+struct tipc_port_list {
int count;
- struct port_list *next;
+ struct tipc_port_list *next;
u32 ports[PLSIZE];
};
@@ -85,11 +83,13 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
return !memcmp(nm_a, nm_b, sizeof(*nm_a));
}
-void tipc_port_list_add(struct port_list *pl_ptr, u32 port);
-void tipc_port_list_free(struct port_list *pl_ptr);
+void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
+void tipc_port_list_free(struct tipc_port_list *pl_ptr);
-int tipc_bclink_init(void);
+void tipc_bclink_init(void);
void tipc_bclink_stop(void);
+void tipc_bclink_add_node(u32 addr);
+void tipc_bclink_remove_node(u32 addr);
struct tipc_node *tipc_bclink_retransmit_to(void);
void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
int tipc_bclink_send_msg(struct sk_buff *buf);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index e2202de3d93e..329fb659fae4 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -41,7 +41,7 @@
#define MAX_ADDR_STR 32
-static struct media media_list[MAX_MEDIA];
+static struct tipc_media *media_list[MAX_MEDIA];
static u32 media_count;
struct tipc_bearer tipc_bearers[MAX_BEARERS];
@@ -65,17 +65,31 @@ static int media_name_valid(const char *name)
}
/**
- * media_find - locates specified media object by name
+ * tipc_media_find - locates specified media object by name
*/
-static struct media *media_find(const char *name)
+struct tipc_media *tipc_media_find(const char *name)
{
- struct media *m_ptr;
u32 i;
- for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
- if (!strcmp(m_ptr->name, name))
- return m_ptr;
+ for (i = 0; i < media_count; i++) {
+ if (!strcmp(media_list[i]->name, name))
+ return media_list[i];
+ }
+ return NULL;
+}
+
+/**
+ * media_find_id - locates specified media object by type identifier
+ */
+
+static struct tipc_media *media_find_id(u8 type)
+{
+ u32 i;
+
+ for (i = 0; i < media_count; i++) {
+ if (media_list[i]->type_id == type)
+ return media_list[i];
}
return NULL;
}
@@ -86,87 +100,34 @@ static struct media *media_find(const char *name)
* Bearers for this media type must be activated separately at a later stage.
*/
-int tipc_register_media(u32 media_type,
- char *name,
- int (*enable)(struct tipc_bearer *),
- void (*disable)(struct tipc_bearer *),
- int (*send_msg)(struct sk_buff *,
- struct tipc_bearer *,
- struct tipc_media_addr *),
- char *(*addr2str)(struct tipc_media_addr *a,
- char *str_buf, int str_size),
- struct tipc_media_addr *bcast_addr,
- const u32 bearer_priority,
- const u32 link_tolerance, /* [ms] */
- const u32 send_window_limit)
+int tipc_register_media(struct tipc_media *m_ptr)
{
- struct media *m_ptr;
- u32 media_id;
- u32 i;
int res = -EINVAL;
write_lock_bh(&tipc_net_lock);
- if (tipc_mode != TIPC_NET_MODE) {
- warn("Media <%s> rejected, not in networked mode yet\n", name);
+ if (!media_name_valid(m_ptr->name))
goto exit;
- }
- if (!media_name_valid(name)) {
- warn("Media <%s> rejected, illegal name\n", name);
+ if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) ||
+ !m_ptr->bcast_addr.broadcast)
goto exit;
- }
- if (!bcast_addr) {
- warn("Media <%s> rejected, no broadcast address\n", name);
+ if (m_ptr->priority > TIPC_MAX_LINK_PRI)
goto exit;
- }
- if ((bearer_priority < TIPC_MIN_LINK_PRI) ||
- (bearer_priority > TIPC_MAX_LINK_PRI)) {
- warn("Media <%s> rejected, illegal priority (%u)\n", name,
- bearer_priority);
+ if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) ||
+ (m_ptr->tolerance > TIPC_MAX_LINK_TOL))
goto exit;
- }
- if ((link_tolerance < TIPC_MIN_LINK_TOL) ||
- (link_tolerance > TIPC_MAX_LINK_TOL)) {
- warn("Media <%s> rejected, illegal tolerance (%u)\n", name,
- link_tolerance);
+ if (media_count >= MAX_MEDIA)
goto exit;
- }
-
- media_id = media_count++;
- if (media_id >= MAX_MEDIA) {
- warn("Media <%s> rejected, media limit reached (%u)\n", name,
- MAX_MEDIA);
- media_count--;
+ if (tipc_media_find(m_ptr->name) || media_find_id(m_ptr->type_id))
goto exit;
- }
- for (i = 0; i < media_id; i++) {
- if (media_list[i].type_id == media_type) {
- warn("Media <%s> rejected, duplicate type (%u)\n", name,
- media_type);
- media_count--;
- goto exit;
- }
- if (!strcmp(name, media_list[i].name)) {
- warn("Media <%s> rejected, duplicate name\n", name);
- media_count--;
- goto exit;
- }
- }
- m_ptr = &media_list[media_id];
- m_ptr->type_id = media_type;
- m_ptr->send_msg = send_msg;
- m_ptr->enable_bearer = enable;
- m_ptr->disable_bearer = disable;
- m_ptr->addr2str = addr2str;
- memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr));
- strcpy(m_ptr->name, name);
- m_ptr->priority = bearer_priority;
- m_ptr->tolerance = link_tolerance;
- m_ptr->window = send_window_limit;
+ media_list[media_count] = m_ptr;
+ media_count++;
res = 0;
exit:
write_unlock_bh(&tipc_net_lock);
+ if (res)
+ warn("Media <%s> registration error\n", m_ptr->name);
return res;
}
@@ -176,27 +137,19 @@ exit:
void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a)
{
- struct media *m_ptr;
- u32 media_type;
- u32 i;
+ char addr_str[MAX_ADDR_STR];
+ struct tipc_media *m_ptr;
- media_type = ntohl(a->type);
- for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
- if (m_ptr->type_id == media_type)
- break;
- }
-
- if ((i < media_count) && (m_ptr->addr2str != NULL)) {
- char addr_str[MAX_ADDR_STR];
+ m_ptr = media_find_id(a->media_id);
- tipc_printf(pb, "%s(%s)", m_ptr->name,
- m_ptr->addr2str(a, addr_str, sizeof(addr_str)));
- } else {
- unchar *addr = (unchar *)&a->dev_addr;
+ if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str)))
+ tipc_printf(pb, "%s(%s)", m_ptr->name, addr_str);
+ else {
+ u32 i;
- tipc_printf(pb, "UNKNOWN(%u)", media_type);
- for (i = 0; i < (sizeof(*a) - sizeof(a->type)); i++)
- tipc_printf(pb, "-%02x", addr[i]);
+ tipc_printf(pb, "UNKNOWN(%u)", a->media_id);
+ for (i = 0; i < sizeof(a->value); i++)
+ tipc_printf(pb, "-%02x", a->value[i]);
}
}
@@ -207,7 +160,6 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a)
struct sk_buff *tipc_media_get_names(void)
{
struct sk_buff *buf;
- struct media *m_ptr;
int i;
buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME));
@@ -215,9 +167,10 @@ struct sk_buff *tipc_media_get_names(void)
return NULL;
read_lock_bh(&tipc_net_lock);
- for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
- tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, m_ptr->name,
- strlen(m_ptr->name) + 1);
+ for (i = 0; i < media_count; i++) {
+ tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME,
+ media_list[i]->name,
+ strlen(media_list[i]->name) + 1);
}
read_unlock_bh(&tipc_net_lock);
return buf;
@@ -232,7 +185,7 @@ struct sk_buff *tipc_media_get_names(void)
*/
static int bearer_name_validate(const char *name,
- struct bearer_name *name_parts)
+ struct tipc_bearer_names *name_parts)
{
char name_copy[TIPC_MAX_BEARER_NAME];
char *media_name;
@@ -276,10 +229,10 @@ static int bearer_name_validate(const char *name,
}
/**
- * bearer_find - locates bearer object with matching bearer name
+ * tipc_bearer_find - locates bearer object with matching bearer name
*/
-static struct tipc_bearer *bearer_find(const char *name)
+struct tipc_bearer *tipc_bearer_find(const char *name)
{
struct tipc_bearer *b_ptr;
u32 i;
@@ -318,7 +271,6 @@ struct tipc_bearer *tipc_bearer_find_interface(const char *if_name)
struct sk_buff *tipc_bearer_get_names(void)
{
struct sk_buff *buf;
- struct media *m_ptr;
struct tipc_bearer *b_ptr;
int i, j;
@@ -327,10 +279,10 @@ struct sk_buff *tipc_bearer_get_names(void)
return NULL;
read_lock_bh(&tipc_net_lock);
- for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
+ for (i = 0; i < media_count; i++) {
for (j = 0; j < MAX_BEARERS; j++) {
b_ptr = &tipc_bearers[j];
- if (b_ptr->active && (b_ptr->media == m_ptr)) {
+ if (b_ptr->active && (b_ptr->media == media_list[i])) {
tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
b_ptr->name,
strlen(b_ptr->name) + 1);
@@ -366,7 +318,7 @@ void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
static int bearer_push(struct tipc_bearer *b_ptr)
{
u32 res = 0;
- struct link *ln, *tln;
+ struct tipc_link *ln, *tln;
if (b_ptr->blocked)
return 0;
@@ -412,7 +364,8 @@ void tipc_continue(struct tipc_bearer *b_ptr)
* bearer.lock is busy
*/
-static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link *l_ptr)
+static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr,
+ struct tipc_link *l_ptr)
{
list_move_tail(&l_ptr->link_list, &b_ptr->cong_links);
}
@@ -425,7 +378,7 @@ static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link
* bearer.lock is free
*/
-void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr)
+void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr)
{
spin_lock_bh(&b_ptr->lock);
tipc_bearer_schedule_unlocked(b_ptr, l_ptr);
@@ -438,7 +391,8 @@ void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr)
* and if there is, try to resolve it before returning.
* 'tipc_net_lock' is read_locked when this function is called
*/
-int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr)
+int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr,
+ struct tipc_link *l_ptr)
{
int res = 1;
@@ -457,7 +411,7 @@ int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr
* tipc_bearer_congested - determines if bearer is currently congested
*/
-int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr)
+int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr)
{
if (unlikely(b_ptr->blocked))
return 1;
@@ -473,8 +427,8 @@ int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr)
int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
{
struct tipc_bearer *b_ptr;
- struct media *m_ptr;
- struct bearer_name b_name;
+ struct tipc_media *m_ptr;
+ struct tipc_bearer_names b_names;
char addr_string[16];
u32 bearer_id;
u32 with_this_prio;
@@ -486,7 +440,7 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
name);
return -ENOPROTOOPT;
}
- if (!bearer_name_validate(name, &b_name)) {
+ if (!bearer_name_validate(name, &b_names)) {
warn("Bearer <%s> rejected, illegal name\n", name);
return -EINVAL;
}
@@ -511,10 +465,10 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
write_lock_bh(&tipc_net_lock);
- m_ptr = media_find(b_name.media_name);
+ m_ptr = tipc_media_find(b_names.media_name);
if (!m_ptr) {
warn("Bearer <%s> rejected, media <%s> not registered\n", name,
- b_name.media_name);
+ b_names.media_name);
goto exit;
}
@@ -561,6 +515,8 @@ restart:
b_ptr->identity = bearer_id;
b_ptr->media = m_ptr;
+ b_ptr->tolerance = m_ptr->tolerance;
+ b_ptr->window = m_ptr->window;
b_ptr->net_plane = bearer_id + 'A';
b_ptr->active = 1;
b_ptr->priority = priority;
@@ -590,11 +546,11 @@ exit:
int tipc_block_bearer(const char *name)
{
struct tipc_bearer *b_ptr = NULL;
- struct link *l_ptr;
- struct link *temp_l_ptr;
+ struct tipc_link *l_ptr;
+ struct tipc_link *temp_l_ptr;
read_lock_bh(&tipc_net_lock);
- b_ptr = bearer_find(name);
+ b_ptr = tipc_bearer_find(name);
if (!b_ptr) {
warn("Attempt to block unknown bearer <%s>\n", name);
read_unlock_bh(&tipc_net_lock);
@@ -625,8 +581,8 @@ int tipc_block_bearer(const char *name)
static void bearer_disable(struct tipc_bearer *b_ptr)
{
- struct link *l_ptr;
- struct link *temp_l_ptr;
+ struct tipc_link *l_ptr;
+ struct tipc_link *temp_l_ptr;
info("Disabling bearer <%s>\n", b_ptr->name);
spin_lock_bh(&b_ptr->lock);
@@ -648,7 +604,7 @@ int tipc_disable_bearer(const char *name)
int res;
write_lock_bh(&tipc_net_lock);
- b_ptr = bearer_find(name);
+ b_ptr = tipc_bearer_find(name);
if (b_ptr == NULL) {
warn("Attempt to disable unknown bearer <%s>\n", name);
res = -EINVAL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index d696f9e414e3..d3eac56b8c21 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -43,32 +43,45 @@
#define MAX_MEDIA 2
/*
+ * Identifiers associated with TIPC message header media address info
+ *
+ * - address info field is 20 bytes long
+ * - media type identifier located at offset 3
+ * - remaining bytes vary according to media type
+ */
+
+#define TIPC_MEDIA_ADDR_SIZE 20
+#define TIPC_MEDIA_TYPE_OFFSET 3
+
+/*
* Identifiers of supported TIPC media types
*/
#define TIPC_MEDIA_TYPE_ETH 1
/*
- * Destination address structure used by TIPC bearers when sending messages
- *
- * IMPORTANT: The fields of this structure MUST be stored using the specified
- * byte order indicated below, as the structure is exchanged between nodes
- * as part of a link setup process.
+ * struct tipc_media_addr - destination address used by TIPC bearers
+ * @value: address info (format defined by media)
+ * @media_id: TIPC media type identifier
+ * @broadcast: non-zero if address is a broadcast address
*/
+
struct tipc_media_addr {
- __be32 type; /* bearer type (network byte order) */
- union {
- __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */
- } dev_addr;
+ u8 value[TIPC_MEDIA_ADDR_SIZE];
+ u8 media_id;
+ u8 broadcast;
};
struct tipc_bearer;
/**
- * struct media - TIPC media information available to internal users
+ * struct tipc_media - TIPC media information available to internal users
* @send_msg: routine which handles buffer transmission
* @enable_bearer: routine which enables a bearer
* @disable_bearer: routine which disables a bearer
- * @addr2str: routine which converts bearer's address to string form
+ * @addr2str: routine which converts media address to string
+ * @str2addr: routine which converts media address from string
+ * @addr2msg: routine which converts media address to protocol message area
+ * @msg2addr: routine which converts media address from protocol message area
* @bcast_addr: media address used in broadcasting
* @priority: default link (and bearer) priority
* @tolerance: default time (in ms) before declaring link failure
@@ -77,14 +90,16 @@ struct tipc_bearer;
* @name: media name
*/
-struct media {
+struct tipc_media {
int (*send_msg)(struct sk_buff *buf,
struct tipc_bearer *b_ptr,
struct tipc_media_addr *dest);
int (*enable_bearer)(struct tipc_bearer *b_ptr);
void (*disable_bearer)(struct tipc_bearer *b_ptr);
- char *(*addr2str)(struct tipc_media_addr *a,
- char *str_buf, int str_size);
+ int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
+ int (*str2addr)(struct tipc_media_addr *a, char *str_buf);
+ int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
+ int (*msg2addr)(struct tipc_media_addr *a, char *msg_area);
struct tipc_media_addr bcast_addr;
u32 priority;
u32 tolerance;
@@ -103,6 +118,8 @@ struct media {
* @name: bearer name (format = media:interface)
* @media: ptr to media structure associated with bearer
* @priority: default link priority for bearer
+ * @window: default window size for bearer
+ * @tolerance: default link tolerance for bearer
* @identity: array index of this bearer within TIPC bearer array
* @link_req: ptr to (optional) structure making periodic link setup requests
* @links: list of non-congested links associated with bearer
@@ -122,10 +139,12 @@ struct tipc_bearer {
struct tipc_media_addr addr; /* initalized by media */
char name[TIPC_MAX_BEARER_NAME];
spinlock_t lock;
- struct media *media;
+ struct tipc_media *media;
u32 priority;
+ u32 window;
+ u32 tolerance;
u32 identity;
- struct link_req *link_req;
+ struct tipc_link_req *link_req;
struct list_head links;
struct list_head cong_links;
int active;
@@ -133,28 +152,19 @@ struct tipc_bearer {
struct tipc_node_map nodes;
};
-struct bearer_name {
+struct tipc_bearer_names {
char media_name[TIPC_MAX_MEDIA_NAME];
char if_name[TIPC_MAX_IF_NAME];
};
-struct link;
+struct tipc_link;
extern struct tipc_bearer tipc_bearers[];
/*
* TIPC routines available to supported media types
*/
-int tipc_register_media(u32 media_type,
- char *media_name, int (*enable)(struct tipc_bearer *),
- void (*disable)(struct tipc_bearer *),
- int (*send_msg)(struct sk_buff *,
- struct tipc_bearer *, struct tipc_media_addr *),
- char *(*addr2str)(struct tipc_media_addr *a,
- char *str_buf, int str_size),
- struct tipc_media_addr *bcast_addr, const u32 bearer_priority,
- const u32 link_tolerance, /* [ms] */
- const u32 send_window_limit);
+int tipc_register_media(struct tipc_media *m_ptr);
void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
@@ -170,16 +180,21 @@ int tipc_disable_bearer(const char *name);
int tipc_eth_media_start(void);
void tipc_eth_media_stop(void);
+int tipc_media_set_priority(const char *name, u32 new_value);
+int tipc_media_set_window(const char *name, u32 new_value);
void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
struct sk_buff *tipc_media_get_names(void);
struct sk_buff *tipc_bearer_get_names(void);
void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
-void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr);
+void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr);
+struct tipc_bearer *tipc_bearer_find(const char *name);
struct tipc_bearer *tipc_bearer_find_interface(const char *if_name);
-int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr);
-int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr);
+struct tipc_media *tipc_media_find(const char *name);
+int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr,
+ struct tipc_link *l_ptr);
+int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr);
void tipc_bearer_stop(void);
void tipc_bearer_lock_push(struct tipc_bearer *b_ptr);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index b25a396b7e1e..4785bf26cdf4 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -184,13 +184,12 @@ static struct sk_buff *cfg_set_own_addr(void)
" (cannot change node address once assigned)");
/*
- * Must release all spinlocks before calling start_net() because
- * Linux version of TIPC calls eth_media_start() which calls
- * register_netdevice_notifier() which may block!
- *
- * Temporarily releasing the lock should be harmless for non-Linux TIPC,
- * but Linux version of eth_media_start() should really be reworked
- * so that it can be called with spinlocks held.
+ * Must temporarily release configuration spinlock while switching into
+ * networking mode as it calls tipc_eth_media_start(), which may sleep.
+ * Releasing the lock is harmless as other locally-issued configuration
+ * commands won't occur until this one completes, and remotely-issued
+ * configuration commands can't be received until a local configuration
+ * command to enable the first bearer is received and processed.
*/
spin_unlock_bh(&config_lock);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index c21331d58fdb..2691cd57b8a8 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -99,8 +99,8 @@ struct sk_buff *tipc_buf_acquire(u32 size)
static void tipc_core_stop_net(void)
{
- tipc_eth_media_stop();
tipc_net_stop();
+ tipc_eth_media_stop();
}
/**
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index f2fb96e86ee8..a00e5f811569 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -45,7 +45,7 @@
/**
- * struct link_req - information about an ongoing link setup request
+ * struct tipc_link_req - information about an ongoing link setup request
* @bearer: bearer issuing requests
* @dest: destination address for request messages
* @domain: network domain to which links can be established
@@ -54,7 +54,7 @@
* @timer: timer governing period between requests
* @timer_intv: current interval between requests (in ms)
*/
-struct link_req {
+struct tipc_link_req {
struct tipc_bearer *bearer;
struct tipc_media_addr dest;
u32 domain;
@@ -84,7 +84,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
msg_set_non_seq(msg, 1);
msg_set_dest_domain(msg, dest_domain);
msg_set_bc_netid(msg, tipc_net_id);
- msg_set_media_addr(msg, &b_ptr->addr);
+ b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg));
}
return buf;
}
@@ -120,7 +120,7 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
{
struct tipc_node *n_ptr;
- struct link *link;
+ struct tipc_link *link;
struct tipc_media_addr media_addr, *addr;
struct sk_buff *rbuf;
struct tipc_msg *msg = buf_msg(buf);
@@ -130,12 +130,15 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
u32 type = msg_type(msg);
int link_fully_up;
- msg_get_media_addr(msg, &media_addr);
+ media_addr.broadcast = 1;
+ b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg));
buf_discard(buf);
/* Validate discovery message from requesting node */
if (net_id != tipc_net_id)
return;
+ if (media_addr.broadcast)
+ return;
if (!tipc_addr_domain_valid(dest))
return;
if (!tipc_addr_node_valid(orig))
@@ -215,7 +218,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
* and is either not currently searching or is searching at a slow rate
*/
-static void disc_update(struct link_req *req)
+static void disc_update(struct tipc_link_req *req)
{
if (!req->num_nodes) {
if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
@@ -231,7 +234,7 @@ static void disc_update(struct link_req *req)
* @req: ptr to link request structure
*/
-void tipc_disc_add_dest(struct link_req *req)
+void tipc_disc_add_dest(struct tipc_link_req *req)
{
req->num_nodes++;
}
@@ -241,7 +244,7 @@ void tipc_disc_add_dest(struct link_req *req)
* @req: ptr to link request structure
*/
-void tipc_disc_remove_dest(struct link_req *req)
+void tipc_disc_remove_dest(struct tipc_link_req *req)
{
req->num_nodes--;
disc_update(req);
@@ -252,7 +255,7 @@ void tipc_disc_remove_dest(struct link_req *req)
* @req: ptr to link request structure
*/
-static void disc_send_msg(struct link_req *req)
+static void disc_send_msg(struct tipc_link_req *req)
{
if (!req->bearer->blocked)
tipc_bearer_send(req->bearer, req->buf, &req->dest);
@@ -265,7 +268,7 @@ static void disc_send_msg(struct link_req *req)
* Called whenever a link setup request timer associated with a bearer expires.
*/
-static void disc_timeout(struct link_req *req)
+static void disc_timeout(struct tipc_link_req *req)
{
int max_delay;
@@ -313,7 +316,7 @@ exit:
int tipc_disc_create(struct tipc_bearer *b_ptr,
struct tipc_media_addr *dest, u32 dest_domain)
{
- struct link_req *req;
+ struct tipc_link_req *req;
req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
@@ -342,7 +345,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr,
* @req: ptr to link request structure
*/
-void tipc_disc_delete(struct link_req *req)
+void tipc_disc_delete(struct tipc_link_req *req)
{
k_cancel_timer(&req->timer);
k_term_timer(&req->timer);
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index a3af595b86cb..75b67c403aa3 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -37,13 +37,13 @@
#ifndef _TIPC_DISCOVER_H
#define _TIPC_DISCOVER_H
-struct link_req;
+struct tipc_link_req;
int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest,
u32 dest_domain);
-void tipc_disc_delete(struct link_req *req);
-void tipc_disc_add_dest(struct link_req *req);
-void tipc_disc_remove_dest(struct link_req *req);
+void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_add_dest(struct tipc_link_req *req);
+void tipc_disc_remove_dest(struct tipc_link_req *req);
void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr);
#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index e728d4ce2a1b..527e3f0e165d 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -38,28 +38,45 @@
#include "bearer.h"
#define MAX_ETH_BEARERS MAX_BEARERS
-#define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI
-#define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL
-#define ETH_LINK_WINDOW TIPC_DEF_LINK_WIN
+
+#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */
/**
* struct eth_bearer - Ethernet bearer data structure
* @bearer: ptr to associated "generic" bearer structure
* @dev: ptr to associated Ethernet network device
* @tipc_packet_type: used in binding TIPC to Ethernet driver
+ * @cleanup: work item used when disabling bearer
*/
struct eth_bearer {
struct tipc_bearer *bearer;
struct net_device *dev;
struct packet_type tipc_packet_type;
+ struct work_struct cleanup;
};
+static struct tipc_media eth_media_info;
static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
static int eth_started;
static struct notifier_block notifier;
/**
+ * eth_media_addr_set - initialize Ethernet media address structure
+ *
+ * Media-dependent "value" field stores MAC address in first 6 bytes
+ * and zeroes out the remaining bytes.
+ */
+
+static void eth_media_addr_set(struct tipc_media_addr *a, char *mac)
+{
+ memcpy(a->value, mac, ETH_ALEN);
+ memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN);
+ a->media_id = TIPC_MEDIA_TYPE_ETH;
+ a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN);
+}
+
+/**
* send_msg - send a TIPC message out over an Ethernet interface
*/
@@ -85,7 +102,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
skb_reset_network_header(clone);
clone->dev = dev;
- dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr,
+ dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
dev->dev_addr, clone->len);
dev_queue_xmit(clone);
return 0;
@@ -172,22 +189,41 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
tb_ptr->usr_handle = (void *)eb_ptr;
tb_ptr->mtu = dev->mtu;
tb_ptr->blocked = 0;
- tb_ptr->addr.type = htonl(TIPC_MEDIA_TYPE_ETH);
- memcpy(&tb_ptr->addr.dev_addr, dev->dev_addr, ETH_ALEN);
+ eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr);
return 0;
}
/**
+ * cleanup_bearer - break association between Ethernet bearer and interface
+ *
+ * This routine must be invoked from a work queue because it can sleep.
+ */
+
+static void cleanup_bearer(struct work_struct *work)
+{
+ struct eth_bearer *eb_ptr =
+ container_of(work, struct eth_bearer, cleanup);
+
+ dev_remove_pack(&eb_ptr->tipc_packet_type);
+ dev_put(eb_ptr->dev);
+ eb_ptr->dev = NULL;
+}
+
+/**
* disable_bearer - detach TIPC bearer from an Ethernet interface
*
- * We really should do dev_remove_pack() here, but this function can not be
- * called at tasklet level. => Use eth_bearer->bearer as a flag to throw away
- * incoming buffers, & postpone dev_remove_pack() to eth_media_stop() on exit.
+ * Mark Ethernet bearer as inactive so that incoming buffers are thrown away,
+ * then get worker thread to complete bearer cleanup. (Can't do cleanup
+ * here because cleanup code needs to sleep and caller holds spinlocks.)
*/
static void disable_bearer(struct tipc_bearer *tb_ptr)
{
- ((struct eth_bearer *)tb_ptr->usr_handle)->bearer = NULL;
+ struct eth_bearer *eb_ptr = (struct eth_bearer *)tb_ptr->usr_handle;
+
+ eb_ptr->bearer = NULL;
+ INIT_WORK(&eb_ptr->cleanup, cleanup_bearer);
+ schedule_work(&eb_ptr->cleanup);
}
/**
@@ -246,17 +282,81 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt,
* eth_addr2str - convert Ethernet address to string
*/
-static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
+static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
+{
+ if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */
+ return 1;
+
+ sprintf(str_buf, "%pM", a->value);
+ return 0;
+}
+
+/**
+ * eth_str2addr - convert string to Ethernet address
+ */
+
+static int eth_str2addr(struct tipc_media_addr *a, char *str_buf)
+{
+ char mac[ETH_ALEN];
+ int r;
+
+ r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x",
+ (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2],
+ (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]);
+
+ if (r != ETH_ALEN)
+ return 1;
+
+ eth_media_addr_set(a, mac);
+ return 0;
+}
+
+/**
+ * eth_str2addr - convert Ethernet address format to message header format
+ */
+
+static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
{
- unchar *addr = (unchar *)&a->dev_addr;
+ memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
+ msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH;
+ memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN);
+ return 0;
+}
- if (str_size < 18)
- *str_buf = '\0';
- else
- sprintf(str_buf, "%pM", addr);
- return str_buf;
+/**
+ * eth_str2addr - convert message header address format to Ethernet format
+ */
+
+static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area)
+{
+ if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
+ return 1;
+
+ eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET);
+ return 0;
}
+/*
+ * Ethernet media registration info
+ */
+
+static struct tipc_media eth_media_info = {
+ .send_msg = send_msg,
+ .enable_bearer = enable_bearer,
+ .disable_bearer = disable_bearer,
+ .addr2str = eth_addr2str,
+ .str2addr = eth_str2addr,
+ .addr2msg = eth_addr2msg,
+ .msg2addr = eth_msg2addr,
+ .bcast_addr = { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ TIPC_MEDIA_TYPE_ETH, 1 },
+ .priority = TIPC_DEF_LINK_PRI,
+ .tolerance = TIPC_DEF_LINK_TOL,
+ .window = TIPC_DEF_LINK_WIN,
+ .type_id = TIPC_MEDIA_TYPE_ETH,
+ .name = "eth"
+};
+
/**
* tipc_eth_media_start - activate Ethernet bearer support
*
@@ -266,21 +366,12 @@ static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size
int tipc_eth_media_start(void)
{
- struct tipc_media_addr bcast_addr;
int res;
if (eth_started)
return -EINVAL;
- bcast_addr.type = htonl(TIPC_MEDIA_TYPE_ETH);
- memset(&bcast_addr.dev_addr, 0xff, ETH_ALEN);
-
- memset(eth_bearers, 0, sizeof(eth_bearers));
-
- res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth",
- enable_bearer, disable_bearer, send_msg,
- eth_addr2str, &bcast_addr, ETH_LINK_PRIORITY,
- ETH_LINK_TOLERANCE, ETH_LINK_WINDOW);
+ res = tipc_register_media(&eth_media_info);
if (res)
return res;
@@ -298,22 +389,10 @@ int tipc_eth_media_start(void)
void tipc_eth_media_stop(void)
{
- int i;
-
if (!eth_started)
return;
+ flush_scheduled_work();
unregister_netdevice_notifier(&notifier);
- for (i = 0; i < MAX_ETH_BEARERS ; i++) {
- if (eth_bearers[i].bearer) {
- eth_bearers[i].bearer->blocked = 1;
- eth_bearers[i].bearer = NULL;
- }
- if (eth_bearers[i].dev) {
- dev_remove_pack(&eth_bearers[i].tipc_packet_type);
- dev_put(eth_bearers[i].dev);
- }
- }
- memset(&eth_bearers, 0, sizeof(eth_bearers));
eth_started = 0;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ae98a72da11a..ac1832a66f8a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -71,35 +71,36 @@
#define START_CHANGEOVER 100000u
/**
- * struct link_name - deconstructed link name
+ * struct tipc_link_name - deconstructed link name
* @addr_local: network address of node at this end
* @if_local: name of interface at this end
* @addr_peer: network address of node at far end
* @if_peer: name of interface at far end
*/
-struct link_name {
+struct tipc_link_name {
u32 addr_local;
char if_local[TIPC_MAX_IF_NAME];
u32 addr_peer;
char if_peer[TIPC_MAX_IF_NAME];
};
-static void link_handle_out_of_seq_msg(struct link *l_ptr,
+static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
struct sk_buff *buf);
-static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
-static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf);
-static void link_set_supervision_props(struct link *l_ptr, u32 tolerance);
+static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf);
+static int link_recv_changeover_msg(struct tipc_link **l_ptr,
+ struct sk_buff **buf);
+static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance);
static int link_send_sections_long(struct tipc_port *sender,
struct iovec const *msg_sect,
u32 num_sect, unsigned int total_len,
u32 destnode);
-static void link_check_defragm_bufs(struct link *l_ptr);
-static void link_state_event(struct link *l_ptr, u32 event);
-static void link_reset_statistics(struct link *l_ptr);
-static void link_print(struct link *l_ptr, const char *str);
-static void link_start(struct link *l_ptr);
-static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
+static void link_check_defragm_bufs(struct tipc_link *l_ptr);
+static void link_state_event(struct tipc_link *l_ptr, u32 event);
+static void link_reset_statistics(struct tipc_link *l_ptr);
+static void link_print(struct tipc_link *l_ptr, const char *str);
+static void link_start(struct tipc_link *l_ptr);
+static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
/*
* Simple link routines
@@ -110,7 +111,7 @@ static unsigned int align(unsigned int i)
return (i + 3) & ~3u;
}
-static void link_init_max_pkt(struct link *l_ptr)
+static void link_init_max_pkt(struct tipc_link *l_ptr)
{
u32 max_pkt;
@@ -127,14 +128,14 @@ static void link_init_max_pkt(struct link *l_ptr)
l_ptr->max_pkt_probes = 0;
}
-static u32 link_next_sent(struct link *l_ptr)
+static u32 link_next_sent(struct tipc_link *l_ptr)
{
if (l_ptr->next_out)
- return msg_seqno(buf_msg(l_ptr->next_out));
+ return buf_seqno(l_ptr->next_out);
return mod(l_ptr->next_out_no);
}
-static u32 link_last_sent(struct link *l_ptr)
+static u32 link_last_sent(struct tipc_link *l_ptr)
{
return mod(link_next_sent(l_ptr) - 1);
}
@@ -143,28 +144,29 @@ static u32 link_last_sent(struct link *l_ptr)
* Simple non-static link routines (i.e. referenced outside this file)
*/
-int tipc_link_is_up(struct link *l_ptr)
+int tipc_link_is_up(struct tipc_link *l_ptr)
{
if (!l_ptr)
return 0;
return link_working_working(l_ptr) || link_working_unknown(l_ptr);
}
-int tipc_link_is_active(struct link *l_ptr)
+int tipc_link_is_active(struct tipc_link *l_ptr)
{
return (l_ptr->owner->active_links[0] == l_ptr) ||
(l_ptr->owner->active_links[1] == l_ptr);
}
/**
- * link_name_validate - validate & (optionally) deconstruct link name
+ * link_name_validate - validate & (optionally) deconstruct tipc_link name
* @name - ptr to link name string
* @name_parts - ptr to area for link name components (or NULL if not needed)
*
* Returns 1 if link name is valid, otherwise 0.
*/
-static int link_name_validate(const char *name, struct link_name *name_parts)
+static int link_name_validate(const char *name,
+ struct tipc_link_name *name_parts)
{
char name_copy[TIPC_MAX_LINK_NAME];
char *addr_local;
@@ -238,7 +240,7 @@ static int link_name_validate(const char *name, struct link_name *name_parts)
* tipc_node_delete() is called.)
*/
-static void link_timeout(struct link *l_ptr)
+static void link_timeout(struct tipc_link *l_ptr)
{
tipc_node_lock(l_ptr->owner);
@@ -287,7 +289,7 @@ static void link_timeout(struct link *l_ptr)
tipc_node_unlock(l_ptr->owner);
}
-static void link_set_timer(struct link *l_ptr, u32 time)
+static void link_set_timer(struct tipc_link *l_ptr, u32 time)
{
k_start_timer(&l_ptr->timer, time);
}
@@ -301,11 +303,11 @@ static void link_set_timer(struct link *l_ptr, u32 time)
* Returns pointer to link.
*/
-struct link *tipc_link_create(struct tipc_node *n_ptr,
+struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
struct tipc_bearer *b_ptr,
const struct tipc_media_addr *media_addr)
{
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct tipc_msg *msg;
char *if_name;
char addr_string[16];
@@ -343,7 +345,7 @@ struct link *tipc_link_create(struct tipc_node *n_ptr,
l_ptr->checkpoint = 1;
l_ptr->peer_session = INVALID_SESSION;
l_ptr->b_ptr = b_ptr;
- link_set_supervision_props(l_ptr, b_ptr->media->tolerance);
+ link_set_supervision_props(l_ptr, b_ptr->tolerance);
l_ptr->state = RESET_UNKNOWN;
l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
@@ -355,7 +357,7 @@ struct link *tipc_link_create(struct tipc_node *n_ptr,
strcpy((char *)msg_data(msg), if_name);
l_ptr->priority = b_ptr->priority;
- tipc_link_set_queue_limits(l_ptr, b_ptr->media->window);
+ tipc_link_set_queue_limits(l_ptr, b_ptr->window);
link_init_max_pkt(l_ptr);
@@ -382,7 +384,7 @@ struct link *tipc_link_create(struct tipc_node *n_ptr,
* to avoid a potential deadlock situation.
*/
-void tipc_link_delete(struct link *l_ptr)
+void tipc_link_delete(struct tipc_link *l_ptr)
{
if (!l_ptr) {
err("Attempt to delete non-existent link\n");
@@ -401,7 +403,7 @@ void tipc_link_delete(struct link *l_ptr)
kfree(l_ptr);
}
-static void link_start(struct link *l_ptr)
+static void link_start(struct tipc_link *l_ptr)
{
tipc_node_lock(l_ptr->owner);
link_state_event(l_ptr, STARTING_EVT);
@@ -418,7 +420,7 @@ static void link_start(struct link *l_ptr)
* has abated.
*/
-static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
+static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz)
{
struct tipc_port *p_ptr;
@@ -440,7 +442,7 @@ exit:
return -ELINKCONG;
}
-void tipc_link_wakeup_ports(struct link *l_ptr, int all)
+void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all)
{
struct tipc_port *p_ptr;
struct tipc_port *temp_p_ptr;
@@ -475,7 +477,7 @@ exit:
* @l_ptr: pointer to link
*/
-static void link_release_outqueue(struct link *l_ptr)
+static void link_release_outqueue(struct tipc_link *l_ptr)
{
struct sk_buff *buf = l_ptr->first_out;
struct sk_buff *next;
@@ -494,7 +496,7 @@ static void link_release_outqueue(struct link *l_ptr)
* @l_ptr: pointer to link
*/
-void tipc_link_reset_fragments(struct link *l_ptr)
+void tipc_link_reset_fragments(struct tipc_link *l_ptr)
{
struct sk_buff *buf = l_ptr->defragm_buf;
struct sk_buff *next;
@@ -512,7 +514,7 @@ void tipc_link_reset_fragments(struct link *l_ptr)
* @l_ptr: pointer to link
*/
-void tipc_link_stop(struct link *l_ptr)
+void tipc_link_stop(struct tipc_link *l_ptr)
{
struct sk_buff *buf;
struct sk_buff *next;
@@ -537,7 +539,7 @@ void tipc_link_stop(struct link *l_ptr)
l_ptr->proto_msg_queue = NULL;
}
-void tipc_link_reset(struct link *l_ptr)
+void tipc_link_reset(struct tipc_link *l_ptr)
{
struct sk_buff *buf;
u32 prev_state = l_ptr->state;
@@ -597,7 +599,7 @@ void tipc_link_reset(struct link *l_ptr)
}
-static void link_activate(struct link *l_ptr)
+static void link_activate(struct tipc_link *l_ptr)
{
l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
tipc_node_link_up(l_ptr->owner, l_ptr);
@@ -610,9 +612,9 @@ static void link_activate(struct link *l_ptr)
* @event: state machine event to process
*/
-static void link_state_event(struct link *l_ptr, unsigned event)
+static void link_state_event(struct tipc_link *l_ptr, unsigned event)
{
- struct link *other;
+ struct tipc_link *other;
u32 cont_intv = l_ptr->continuity_interval;
if (!l_ptr->started && (event != STARTING_EVT))
@@ -784,7 +786,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
* the tail of an existing one.
*/
-static int link_bundle_buf(struct link *l_ptr,
+static int link_bundle_buf(struct tipc_link *l_ptr,
struct sk_buff *bundler,
struct sk_buff *buf)
{
@@ -813,7 +815,7 @@ static int link_bundle_buf(struct link *l_ptr,
return 1;
}
-static void link_add_to_outqueue(struct link *l_ptr,
+static void link_add_to_outqueue(struct tipc_link *l_ptr,
struct sk_buff *buf,
struct tipc_msg *msg)
{
@@ -834,7 +836,7 @@ static void link_add_to_outqueue(struct link *l_ptr,
l_ptr->stats.max_queue_sz = l_ptr->out_queue_size;
}
-static void link_add_chain_to_outqueue(struct link *l_ptr,
+static void link_add_chain_to_outqueue(struct tipc_link *l_ptr,
struct sk_buff *buf_chain,
u32 long_msgno)
{
@@ -859,7 +861,7 @@ static void link_add_chain_to_outqueue(struct link *l_ptr,
* has failed, and from link_send()
*/
-int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
+int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
u32 size = msg_size(msg);
@@ -954,7 +956,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
{
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct tipc_node *n_ptr;
int res = -ELINKCONG;
@@ -988,7 +990,7 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
void tipc_link_send_names(struct list_head *message_list, u32 dest)
{
struct tipc_node *n_ptr;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct sk_buff *buf;
struct sk_buff *temp_buf;
@@ -1027,7 +1029,7 @@ void tipc_link_send_names(struct list_head *message_list, u32 dest)
* Link is locked. Returns user data length.
*/
-static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
+static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
u32 *used_max_pkt)
{
struct tipc_msg *msg = buf_msg(buf);
@@ -1061,7 +1063,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
*/
int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
{
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct tipc_node *n_ptr;
int res;
u32 selector = msg_origport(buf_msg(buf)) & 1;
@@ -1100,7 +1102,7 @@ int tipc_link_send_sections_fast(struct tipc_port *sender,
u32 destaddr)
{
struct tipc_msg *hdr = &sender->phdr;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct sk_buff *buf;
struct tipc_node *node;
int res;
@@ -1195,7 +1197,7 @@ static int link_send_sections_long(struct tipc_port *sender,
unsigned int total_len,
u32 destaddr)
{
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct tipc_node *node;
struct tipc_msg *hdr = &sender->phdr;
u32 dsz = total_len;
@@ -1342,7 +1344,7 @@ reject:
/*
* tipc_link_push_packet: Push one unsent packet to the media
*/
-u32 tipc_link_push_packet(struct link *l_ptr)
+u32 tipc_link_push_packet(struct tipc_link *l_ptr)
{
struct sk_buff *buf = l_ptr->first_out;
u32 r_q_size = l_ptr->retransm_queue_size;
@@ -1354,7 +1356,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
if (r_q_size && buf) {
u32 last = lesser(mod(r_q_head + r_q_size),
link_last_sent(l_ptr));
- u32 first = msg_seqno(buf_msg(buf));
+ u32 first = buf_seqno(buf);
while (buf && less(first, r_q_head)) {
first = mod(first + 1);
@@ -1403,7 +1405,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
if (buf) {
struct tipc_msg *msg = buf_msg(buf);
u32 next = msg_seqno(msg);
- u32 first = msg_seqno(buf_msg(l_ptr->first_out));
+ u32 first = buf_seqno(l_ptr->first_out);
if (mod(next - first) < l_ptr->queue_limit[0]) {
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
@@ -1426,7 +1428,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
* push_queue(): push out the unsent messages of a link where
* congestion has abated. Node is locked
*/
-void tipc_link_push_queue(struct link *l_ptr)
+void tipc_link_push_queue(struct tipc_link *l_ptr)
{
u32 res;
@@ -1470,7 +1472,8 @@ static void link_reset_all(unsigned long addr)
read_unlock_bh(&tipc_net_lock);
}
-static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
+static void link_retransmit_failure(struct tipc_link *l_ptr,
+ struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
@@ -1514,7 +1517,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
}
}
-void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
+void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
u32 retransmits)
{
struct tipc_msg *msg;
@@ -1558,7 +1561,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
} else {
tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
l_ptr->stats.bearer_congs++;
- l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf));
+ l_ptr->retransm_queue_head = buf_seqno(buf);
l_ptr->retransm_queue_size = retransmits;
return;
}
@@ -1571,7 +1574,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
* link_insert_deferred_queue - insert deferred messages back into receive chain
*/
-static struct sk_buff *link_insert_deferred_queue(struct link *l_ptr,
+static struct sk_buff *link_insert_deferred_queue(struct tipc_link *l_ptr,
struct sk_buff *buf)
{
u32 seq_no;
@@ -1579,7 +1582,7 @@ static struct sk_buff *link_insert_deferred_queue(struct link *l_ptr,
if (l_ptr->oldest_deferred_in == NULL)
return buf;
- seq_no = msg_seqno(buf_msg(l_ptr->oldest_deferred_in));
+ seq_no = buf_seqno(l_ptr->oldest_deferred_in);
if (seq_no == mod(l_ptr->next_in_no)) {
l_ptr->newest_deferred_in->next = buf;
buf = l_ptr->oldest_deferred_in;
@@ -1653,7 +1656,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
read_lock_bh(&tipc_net_lock);
while (head) {
struct tipc_node *n_ptr;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct sk_buff *crs;
struct sk_buff *buf = head;
struct tipc_msg *msg;
@@ -1733,14 +1736,12 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
/* Release acked messages */
- if (less(n_ptr->bclink.acked, msg_bcast_ack(msg))) {
- if (tipc_node_is_up(n_ptr) && n_ptr->bclink.supported)
- tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
- }
+ if (tipc_node_is_up(n_ptr) && n_ptr->bclink.supported)
+ tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
crs = l_ptr->first_out;
while ((crs != l_ptr->next_out) &&
- less_eq(msg_seqno(buf_msg(crs)), ackd)) {
+ less_eq(buf_seqno(crs), ackd)) {
struct sk_buff *next = crs->next;
buf_discard(crs);
@@ -1863,7 +1864,7 @@ u32 tipc_link_defer_pkt(struct sk_buff **head,
{
struct sk_buff *prev = NULL;
struct sk_buff *crs = *head;
- u32 seq_no = msg_seqno(buf_msg(buf));
+ u32 seq_no = buf_seqno(buf);
buf->next = NULL;
@@ -1874,7 +1875,7 @@ u32 tipc_link_defer_pkt(struct sk_buff **head,
}
/* Last ? */
- if (less(msg_seqno(buf_msg(*tail)), seq_no)) {
+ if (less(buf_seqno(*tail), seq_no)) {
(*tail)->next = buf;
*tail = buf;
return 1;
@@ -1908,10 +1909,10 @@ u32 tipc_link_defer_pkt(struct sk_buff **head,
* link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet
*/
-static void link_handle_out_of_seq_msg(struct link *l_ptr,
+static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
struct sk_buff *buf)
{
- u32 seq_no = msg_seqno(buf_msg(buf));
+ u32 seq_no = buf_seqno(buf);
if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
link_recv_proto_msg(l_ptr, buf);
@@ -1946,8 +1947,9 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr,
/*
* Send protocol message to the other endpoint.
*/
-void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
- u32 gap, u32 tolerance, u32 priority, u32 ack_mtu)
+void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ,
+ int probe_msg, u32 gap, u32 tolerance,
+ u32 priority, u32 ack_mtu)
{
struct sk_buff *buf = NULL;
struct tipc_msg *msg = l_ptr->pmsg;
@@ -1973,10 +1975,10 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
if (!tipc_link_is_up(l_ptr))
return;
if (l_ptr->next_out)
- next_sent = msg_seqno(buf_msg(l_ptr->next_out));
+ next_sent = buf_seqno(l_ptr->next_out);
msg_set_next_sent(msg, next_sent);
if (l_ptr->oldest_deferred_in) {
- u32 rec = msg_seqno(buf_msg(l_ptr->oldest_deferred_in));
+ u32 rec = buf_seqno(l_ptr->oldest_deferred_in);
gap = mod(rec - mod(l_ptr->next_in_no));
}
msg_set_seq_gap(msg, gap);
@@ -2064,7 +2066,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
* change at any time. The node with lowest address rules
*/
-static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
+static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
{
u32 rec_gap = 0;
u32 max_pkt_info;
@@ -2197,12 +2199,12 @@ exit:
* tipc_link_tunnel(): Send one message via a link belonging to
* another bearer. Owner node is locked.
*/
-static void tipc_link_tunnel(struct link *l_ptr,
+static void tipc_link_tunnel(struct tipc_link *l_ptr,
struct tipc_msg *tunnel_hdr,
struct tipc_msg *msg,
u32 selector)
{
- struct link *tunnel;
+ struct tipc_link *tunnel;
struct sk_buff *buf;
u32 length = msg_size(msg);
@@ -2231,11 +2233,11 @@ static void tipc_link_tunnel(struct link *l_ptr,
* Owner node is locked.
*/
-void tipc_link_changeover(struct link *l_ptr)
+void tipc_link_changeover(struct tipc_link *l_ptr)
{
u32 msgcount = l_ptr->out_queue_size;
struct sk_buff *crs = l_ptr->first_out;
- struct link *tunnel = l_ptr->owner->active_links[0];
+ struct tipc_link *tunnel = l_ptr->owner->active_links[0];
struct tipc_msg tunnel_hdr;
int split_bundles;
@@ -2294,7 +2296,7 @@ void tipc_link_changeover(struct link *l_ptr)
}
}
-void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
+void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel)
{
struct sk_buff *iter;
struct tipc_msg tunnel_hdr;
@@ -2358,11 +2360,11 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
* via other link. Node is locked. Return extracted buffer.
*/
-static int link_recv_changeover_msg(struct link **l_ptr,
+static int link_recv_changeover_msg(struct tipc_link **l_ptr,
struct sk_buff **buf)
{
struct sk_buff *tunnel_buf = *buf;
- struct link *dest_link;
+ struct tipc_link *dest_link;
struct tipc_msg *msg;
struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
u32 msg_typ = msg_type(tunnel_msg);
@@ -2462,7 +2464,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
* The buffer is complete, inclusive total message length.
* Returns user data length.
*/
-static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
+static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
{
struct sk_buff *buf_chain = NULL;
struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain;
@@ -2591,7 +2593,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
/* Is there an incomplete message waiting for this fragment? */
- while (pbuf && ((msg_seqno(buf_msg(pbuf)) != long_msg_seq_no) ||
+ while (pbuf && ((buf_seqno(pbuf) != long_msg_seq_no) ||
(msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) {
prev = pbuf;
pbuf = pbuf->next;
@@ -2658,7 +2660,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
* @l_ptr: pointer to link
*/
-static void link_check_defragm_bufs(struct link *l_ptr)
+static void link_check_defragm_bufs(struct tipc_link *l_ptr)
{
struct sk_buff *prev = NULL;
struct sk_buff *next = NULL;
@@ -2688,7 +2690,7 @@ static void link_check_defragm_bufs(struct link *l_ptr)
-static void link_set_supervision_props(struct link *l_ptr, u32 tolerance)
+static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance)
{
if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
return;
@@ -2700,7 +2702,7 @@ static void link_set_supervision_props(struct link *l_ptr, u32 tolerance)
}
-void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
+void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
{
/* Data messages from this node, inclusive FIRST_FRAGM */
l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window;
@@ -2730,11 +2732,12 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
* Returns pointer to link (or 0 if invalid link name).
*/
-static struct link *link_find_link(const char *name, struct tipc_node **node)
+static struct tipc_link *link_find_link(const char *name,
+ struct tipc_node **node)
{
- struct link_name link_name_parts;
+ struct tipc_link_name link_name_parts;
struct tipc_bearer *b_ptr;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
if (!link_name_validate(name, &link_name_parts))
return NULL;
@@ -2754,13 +2757,113 @@ static struct link *link_find_link(const char *name, struct tipc_node **node)
return l_ptr;
}
+/**
+ * link_value_is_valid -- validate proposed link tolerance/priority/window
+ *
+ * @cmd - value type (TIPC_CMD_SET_LINK_*)
+ * @new_value - the new value
+ *
+ * Returns 1 if value is within range, 0 if not.
+ */
+
+static int link_value_is_valid(u16 cmd, u32 new_value)
+{
+ switch (cmd) {
+ case TIPC_CMD_SET_LINK_TOL:
+ return (new_value >= TIPC_MIN_LINK_TOL) &&
+ (new_value <= TIPC_MAX_LINK_TOL);
+ case TIPC_CMD_SET_LINK_PRI:
+ return (new_value <= TIPC_MAX_LINK_PRI);
+ case TIPC_CMD_SET_LINK_WINDOW:
+ return (new_value >= TIPC_MIN_LINK_WIN) &&
+ (new_value <= TIPC_MAX_LINK_WIN);
+ }
+ return 0;
+}
+
+
+/**
+ * link_cmd_set_value - change priority/tolerance/window for link/bearer/media
+ * @name - ptr to link, bearer, or media name
+ * @new_value - new value of link, bearer, or media setting
+ * @cmd - which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*)
+ *
+ * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted.
+ *
+ * Returns 0 if value updated and negative value on error.
+ */
+
+static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
+{
+ struct tipc_node *node;
+ struct tipc_link *l_ptr;
+ struct tipc_bearer *b_ptr;
+ struct tipc_media *m_ptr;
+
+ l_ptr = link_find_link(name, &node);
+ if (l_ptr) {
+ /*
+ * acquire node lock for tipc_link_send_proto_msg().
+ * see "TIPC locking policy" in net.c.
+ */
+ tipc_node_lock(node);
+ switch (cmd) {
+ case TIPC_CMD_SET_LINK_TOL:
+ link_set_supervision_props(l_ptr, new_value);
+ tipc_link_send_proto_msg(l_ptr,
+ STATE_MSG, 0, 0, new_value, 0, 0);
+ break;
+ case TIPC_CMD_SET_LINK_PRI:
+ l_ptr->priority = new_value;
+ tipc_link_send_proto_msg(l_ptr,
+ STATE_MSG, 0, 0, 0, new_value, 0);
+ break;
+ case TIPC_CMD_SET_LINK_WINDOW:
+ tipc_link_set_queue_limits(l_ptr, new_value);
+ break;
+ }
+ tipc_node_unlock(node);
+ return 0;
+ }
+
+ b_ptr = tipc_bearer_find(name);
+ if (b_ptr) {
+ switch (cmd) {
+ case TIPC_CMD_SET_LINK_TOL:
+ b_ptr->tolerance = new_value;
+ return 0;
+ case TIPC_CMD_SET_LINK_PRI:
+ b_ptr->priority = new_value;
+ return 0;
+ case TIPC_CMD_SET_LINK_WINDOW:
+ b_ptr->window = new_value;
+ return 0;
+ }
+ return -EINVAL;
+ }
+
+ m_ptr = tipc_media_find(name);
+ if (!m_ptr)
+ return -ENODEV;
+ switch (cmd) {
+ case TIPC_CMD_SET_LINK_TOL:
+ m_ptr->tolerance = new_value;
+ return 0;
+ case TIPC_CMD_SET_LINK_PRI:
+ m_ptr->priority = new_value;
+ return 0;
+ case TIPC_CMD_SET_LINK_WINDOW:
+ m_ptr->window = new_value;
+ return 0;
+ }
+ return -EINVAL;
+}
+
struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space,
u16 cmd)
{
struct tipc_link_config *args;
u32 new_value;
- struct link *l_ptr;
- struct tipc_node *node;
int res;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG))
@@ -2769,6 +2872,10 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
args = (struct tipc_link_config *)TLV_DATA(req_tlv_area);
new_value = ntohl(args->value);
+ if (!link_value_is_valid(cmd, new_value))
+ return tipc_cfg_reply_error_string(
+ "cannot change, value invalid");
+
if (!strcmp(args->name, tipc_bclink_name)) {
if ((cmd == TIPC_CMD_SET_LINK_WINDOW) &&
(tipc_bclink_set_queue_limits(new_value) == 0))
@@ -2778,43 +2885,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
}
read_lock_bh(&tipc_net_lock);
- l_ptr = link_find_link(args->name, &node);
- if (!l_ptr) {
- read_unlock_bh(&tipc_net_lock);
- return tipc_cfg_reply_error_string("link not found");
- }
-
- tipc_node_lock(node);
- res = -EINVAL;
- switch (cmd) {
- case TIPC_CMD_SET_LINK_TOL:
- if ((new_value >= TIPC_MIN_LINK_TOL) &&
- (new_value <= TIPC_MAX_LINK_TOL)) {
- link_set_supervision_props(l_ptr, new_value);
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, new_value, 0, 0);
- res = 0;
- }
- break;
- case TIPC_CMD_SET_LINK_PRI:
- if ((new_value >= TIPC_MIN_LINK_PRI) &&
- (new_value <= TIPC_MAX_LINK_PRI)) {
- l_ptr->priority = new_value;
- tipc_link_send_proto_msg(l_ptr, STATE_MSG,
- 0, 0, 0, new_value, 0);
- res = 0;
- }
- break;
- case TIPC_CMD_SET_LINK_WINDOW:
- if ((new_value >= TIPC_MIN_LINK_WIN) &&
- (new_value <= TIPC_MAX_LINK_WIN)) {
- tipc_link_set_queue_limits(l_ptr, new_value);
- res = 0;
- }
- break;
- }
- tipc_node_unlock(node);
-
+ res = link_cmd_set_value(args->name, new_value, cmd);
read_unlock_bh(&tipc_net_lock);
if (res)
return tipc_cfg_reply_error_string("cannot change link setting");
@@ -2827,7 +2898,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
* @l_ptr: pointer to link
*/
-static void link_reset_statistics(struct link *l_ptr)
+static void link_reset_statistics(struct tipc_link *l_ptr)
{
memset(&l_ptr->stats, 0, sizeof(l_ptr->stats));
l_ptr->stats.sent_info = l_ptr->next_out_no;
@@ -2837,7 +2908,7 @@ static void link_reset_statistics(struct link *l_ptr)
struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space)
{
char *link_name;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct tipc_node *node;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
@@ -2885,7 +2956,7 @@ static u32 percent(u32 count, u32 total)
static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
{
struct print_buf pb;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct tipc_node *node;
char *status;
u32 profile_total = 0;
@@ -3007,7 +3078,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
{
struct tipc_node *n_ptr;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
u32 res = MAX_PKT_DEFAULT;
if (dest == tipc_own_addr)
@@ -3026,7 +3097,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
return res;
}
-static void link_print(struct link *l_ptr, const char *str)
+static void link_print(struct tipc_link *l_ptr, const char *str)
{
char print_area[256];
struct print_buf pb;
@@ -3046,13 +3117,12 @@ static void link_print(struct link *l_ptr, const char *str)
tipc_printf(buf, "NXI(%u):", mod(l_ptr->next_in_no));
tipc_printf(buf, "SQUE");
if (l_ptr->first_out) {
- tipc_printf(buf, "[%u..", msg_seqno(buf_msg(l_ptr->first_out)));
+ tipc_printf(buf, "[%u..", buf_seqno(l_ptr->first_out));
if (l_ptr->next_out)
- tipc_printf(buf, "%u..",
- msg_seqno(buf_msg(l_ptr->next_out)));
- tipc_printf(buf, "%u]", msg_seqno(buf_msg(l_ptr->last_out)));
- if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) -
- msg_seqno(buf_msg(l_ptr->first_out)))
+ tipc_printf(buf, "%u..", buf_seqno(l_ptr->next_out));
+ tipc_printf(buf, "%u]", buf_seqno(l_ptr->last_out));
+ if ((mod(buf_seqno(l_ptr->last_out) -
+ buf_seqno(l_ptr->first_out))
!= (l_ptr->out_queue_size - 1)) ||
(l_ptr->last_out->next != NULL)) {
tipc_printf(buf, "\nSend queue inconsistency\n");
@@ -3064,8 +3134,8 @@ static void link_print(struct link *l_ptr, const char *str)
tipc_printf(buf, "[]");
tipc_printf(buf, "SQSIZ(%u)", l_ptr->out_queue_size);
if (l_ptr->oldest_deferred_in) {
- u32 o = msg_seqno(buf_msg(l_ptr->oldest_deferred_in));
- u32 n = msg_seqno(buf_msg(l_ptr->newest_deferred_in));
+ u32 o = buf_seqno(l_ptr->oldest_deferred_in);
+ u32 n = buf_seqno(l_ptr->newest_deferred_in);
tipc_printf(buf, ":RQUE[%u..%u]", o, n);
if (l_ptr->deferred_inqueue_sz != mod((n + 1) - o)) {
tipc_printf(buf, ":RQSIZ(%u)",
diff --git a/net/tipc/link.h b/net/tipc/link.h
index e56cb532913e..73c18c140e1d 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -45,6 +45,12 @@
#define PUSH_FINISHED 2
/*
+ * Out-of-range value for link sequence numbers
+ */
+
+#define INVALID_LINK_SEQ 0x10000
+
+/*
* Link states
*/
@@ -61,7 +67,7 @@
#define MAX_PKT_DEFAULT 1500
/**
- * struct link - TIPC link data structure
+ * struct tipc_link - TIPC link data structure
* @addr: network address of link's peer node
* @name: link name character string
* @media_addr: media address to use when sending messages over link
@@ -109,7 +115,7 @@
* @stats: collects statistics regarding link activity
*/
-struct link {
+struct tipc_link {
u32 addr;
char name[TIPC_MAX_LINK_NAME];
struct tipc_media_addr media_addr;
@@ -207,24 +213,24 @@ struct link {
struct tipc_port;
-struct link *tipc_link_create(struct tipc_node *n_ptr,
+struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
struct tipc_bearer *b_ptr,
const struct tipc_media_addr *media_addr);
-void tipc_link_delete(struct link *l_ptr);
-void tipc_link_changeover(struct link *l_ptr);
-void tipc_link_send_duplicate(struct link *l_ptr, struct link *dest);
-void tipc_link_reset_fragments(struct link *l_ptr);
-int tipc_link_is_up(struct link *l_ptr);
-int tipc_link_is_active(struct link *l_ptr);
-u32 tipc_link_push_packet(struct link *l_ptr);
-void tipc_link_stop(struct link *l_ptr);
+void tipc_link_delete(struct tipc_link *l_ptr);
+void tipc_link_changeover(struct tipc_link *l_ptr);
+void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest);
+void tipc_link_reset_fragments(struct tipc_link *l_ptr);
+int tipc_link_is_up(struct tipc_link *l_ptr);
+int tipc_link_is_active(struct tipc_link *l_ptr);
+u32 tipc_link_push_packet(struct tipc_link *l_ptr);
+void tipc_link_stop(struct tipc_link *l_ptr);
struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd);
struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space);
struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space);
-void tipc_link_reset(struct link *l_ptr);
+void tipc_link_reset(struct tipc_link *l_ptr);
int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
void tipc_link_send_names(struct list_head *message_list, u32 dest);
-int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
+int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf);
u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
int tipc_link_send_sections_fast(struct tipc_port *sender,
struct iovec const *msg_sect,
@@ -235,19 +241,26 @@ void tipc_link_recv_bundle(struct sk_buff *buf);
int tipc_link_recv_fragment(struct sk_buff **pending,
struct sk_buff **fb,
struct tipc_msg **msg);
-void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int prob, u32 gap,
- u32 tolerance, u32 priority, u32 acked_mtu);
-void tipc_link_push_queue(struct link *l_ptr);
+void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob,
+ u32 gap, u32 tolerance, u32 priority,
+ u32 acked_mtu);
+void tipc_link_push_queue(struct tipc_link *l_ptr);
u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail,
struct sk_buff *buf);
-void tipc_link_wakeup_ports(struct link *l_ptr, int all);
-void tipc_link_set_queue_limits(struct link *l_ptr, u32 window);
-void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *start, u32 retransmits);
+void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all);
+void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
+void tipc_link_retransmit(struct tipc_link *l_ptr,
+ struct sk_buff *start, u32 retransmits);
/*
* Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
*/
+static inline u32 buf_seqno(struct sk_buff *buf)
+{
+ return msg_seqno(buf_msg(buf));
+}
+
static inline u32 mod(u32 x)
{
return x & 0xffffu;
@@ -282,32 +295,32 @@ static inline u32 lesser(u32 left, u32 right)
* Link status checking routines
*/
-static inline int link_working_working(struct link *l_ptr)
+static inline int link_working_working(struct tipc_link *l_ptr)
{
return l_ptr->state == WORKING_WORKING;
}
-static inline int link_working_unknown(struct link *l_ptr)
+static inline int link_working_unknown(struct tipc_link *l_ptr)
{
return l_ptr->state == WORKING_UNKNOWN;
}
-static inline int link_reset_unknown(struct link *l_ptr)
+static inline int link_reset_unknown(struct tipc_link *l_ptr)
{
return l_ptr->state == RESET_UNKNOWN;
}
-static inline int link_reset_reset(struct link *l_ptr)
+static inline int link_reset_reset(struct tipc_link *l_ptr)
{
return l_ptr->state == RESET_RESET;
}
-static inline int link_blocked(struct link *l_ptr)
+static inline int link_blocked(struct tipc_link *l_ptr)
{
return l_ptr->exp_msg_count || l_ptr->blocked;
}
-static inline int link_congested(struct link *l_ptr)
+static inline int link_congested(struct tipc_link *l_ptr)
{
return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
}
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 83d50967910c..3e4d3e29be61 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -333,11 +333,14 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
}
if (msg_user(msg) == LINK_CONFIG) {
- u32 *raw = (u32 *)msg;
- struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5];
+ struct tipc_media_addr orig;
+
tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg));
tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg));
- tipc_media_addr_printf(buf, orig);
+ memcpy(orig.value, msg_media_addr(msg), sizeof(orig.value));
+ orig.media_id = 0;
+ orig.broadcast = 0;
+ tipc_media_addr_printf(buf, &orig);
}
if (msg_user(msg) == BCAST_PROTOCOL) {
tipc_printf(buf, "BCNACK:AFTER(%u):", msg_bcgap_after(msg));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index d93178f2e852..7b0cda167107 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -78,6 +78,8 @@
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
+#define TIPC_MEDIA_ADDR_OFFSET 5
+
struct tipc_msg {
__be32 hdr[15];
@@ -682,6 +684,10 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r)
msg_set_bits(m, 5, 12, 0x1, r);
}
+static inline char *msg_media_addr(struct tipc_msg *m)
+{
+ return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET];
+}
/*
* Word 9
@@ -734,14 +740,4 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
u32 num_sect, unsigned int total_len,
int max_size, int usrmem, struct sk_buff **buf);
-static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
-{
- memcpy(&((int *)m)[5], a, sizeof(*a));
-}
-
-static inline void msg_get_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
-{
- memcpy(a, &((int *)m)[5], sizeof(*a));
-}
-
#endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index b7ca1bd7b151..98ebb37f1808 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -176,7 +176,7 @@ void tipc_named_withdraw(struct publication *publ)
void tipc_named_node_up(unsigned long nodearg)
{
struct tipc_node *n_ptr;
- struct link *l_ptr;
+ struct tipc_link *l_ptr;
struct publication *publ;
struct distr_item *item = NULL;
struct sk_buff *buf = NULL;
@@ -322,10 +322,9 @@ void tipc_named_recv(struct sk_buff *buf)
/**
* tipc_named_reinit - re-initialize local publication list
*
- * This routine is called whenever TIPC networking is (re)enabled.
+ * This routine is called whenever TIPC networking is enabled.
* All existing publications by this node that have "cluster" or "zone" scope
- * are updated to reflect the node's current network address.
- * (If the node's address is unchanged, the update loop terminates immediately.)
+ * are updated to reflect the node's new network address.
*/
void tipc_named_reinit(void)
@@ -333,10 +332,9 @@ void tipc_named_reinit(void)
struct publication *publ;
write_lock_bh(&tipc_nametbl_lock);
- list_for_each_entry(publ, &publ_root, local_list) {
- if (publ->node == tipc_own_addr)
- break;
+
+ list_for_each_entry(publ, &publ_root, local_list)
publ->node = tipc_own_addr;
- }
+
write_unlock_bh(&tipc_nametbl_lock);
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 46e6b6c2ecc9..89eb5621ebba 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -251,8 +251,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
u32 type, u32 lower, u32 upper,
u32 scope, u32 node, u32 port, u32 key)
{
- struct subscription *s;
- struct subscription *st;
+ struct tipc_subscription *s;
+ struct tipc_subscription *st;
struct publication *publ;
struct sub_seq *sseq;
struct name_info *info;
@@ -381,7 +381,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
struct name_info *info;
struct sub_seq *free;
- struct subscription *s, *st;
+ struct tipc_subscription *s, *st;
int removed_subseq = 0;
if (!sseq)
@@ -448,7 +448,8 @@ found:
* sequence overlapping with the requested sequence
*/
-static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s)
+static void tipc_nameseq_subscribe(struct name_seq *nseq,
+ struct tipc_subscription *s)
{
struct sub_seq *sseq = nseq->sseqs;
@@ -625,7 +626,7 @@ not_found:
*/
int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
- struct port_list *dports)
+ struct tipc_port_list *dports)
{
struct name_seq *seq;
struct sub_seq *sseq;
@@ -739,7 +740,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
* tipc_nametbl_subscribe - add a subscription object to the name table
*/
-void tipc_nametbl_subscribe(struct subscription *s)
+void tipc_nametbl_subscribe(struct tipc_subscription *s)
{
u32 type = s->seq.type;
struct name_seq *seq;
@@ -763,7 +764,7 @@ void tipc_nametbl_subscribe(struct subscription *s)
* tipc_nametbl_unsubscribe - remove a subscription object from name table
*/
-void tipc_nametbl_unsubscribe(struct subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
{
struct name_seq *seq;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 62d77e5e902e..8086b42f92ad 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -39,8 +39,8 @@
#include "node_subscr.h"
-struct subscription;
-struct port_list;
+struct tipc_subscription;
+struct tipc_port_list;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
@@ -90,7 +90,7 @@ extern rwlock_t tipc_nametbl_lock;
struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space);
u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node);
int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
- struct port_list *dports);
+ struct tipc_port_list *dports);
int tipc_nametbl_publish_rsv(u32 ref, unsigned int scope,
struct tipc_name_seq const *seq);
struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
@@ -100,8 +100,8 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
u32 scope, u32 node, u32 ref, u32 key);
struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
u32 node, u32 ref, u32 key);
-void tipc_nametbl_subscribe(struct subscription *s);
-void tipc_nametbl_unsubscribe(struct subscription *s);
+void tipc_nametbl_subscribe(struct tipc_subscription *s);
+void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(void);
void tipc_nametbl_stop(void);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index fafef6c3c0f6..61afee7e8291 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -174,7 +174,6 @@ void tipc_net_route_msg(struct sk_buff *buf)
int tipc_net_start(u32 addr)
{
char addr_string[16];
- int res;
if (tipc_mode != TIPC_NODE_MODE)
return -ENOPROTOOPT;
@@ -187,9 +186,7 @@ int tipc_net_start(u32 addr)
tipc_named_reinit();
tipc_port_reinit();
- res = tipc_bclink_init();
- if (res)
- return res;
+ tipc_bclink_init();
tipc_k_signal((Handler)tipc_subscr_start, 0);
tipc_k_signal((Handler)tipc_cfg_init, 0);
@@ -207,8 +204,8 @@ void tipc_net_stop(void)
if (tipc_mode != TIPC_NET_MODE)
return;
write_lock_bh(&tipc_net_lock);
- tipc_bearer_stop();
tipc_mode = TIPC_NODE_MODE;
+ tipc_bearer_stop();
tipc_bclink_stop();
list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
tipc_node_delete(node);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 27b4bb0cca6c..6b226faad89f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -136,9 +136,9 @@ void tipc_node_delete(struct tipc_node *n_ptr)
* Link becomes active (alone or shared) or standby, depending on its priority.
*/
-void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr)
+void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
- struct link **active = &n_ptr->active_links[0];
+ struct tipc_link **active = &n_ptr->active_links[0];
n_ptr->working_links++;
@@ -171,14 +171,14 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr)
static void node_select_active_links(struct tipc_node *n_ptr)
{
- struct link **active = &n_ptr->active_links[0];
+ struct tipc_link **active = &n_ptr->active_links[0];
u32 i;
u32 highest_prio = 0;
active[0] = active[1] = NULL;
for (i = 0; i < MAX_BEARERS; i++) {
- struct link *l_ptr = n_ptr->links[i];
+ struct tipc_link *l_ptr = n_ptr->links[i];
if (!l_ptr || !tipc_link_is_up(l_ptr) ||
(l_ptr->priority < highest_prio))
@@ -197,9 +197,9 @@ static void node_select_active_links(struct tipc_node *n_ptr)
* tipc_node_link_down - handle loss of link
*/
-void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
+void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
- struct link **active;
+ struct tipc_link **active;
n_ptr->working_links--;
@@ -239,14 +239,14 @@ int tipc_node_is_up(struct tipc_node *n_ptr)
return tipc_node_active_links(n_ptr);
}
-void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr)
+void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
atomic_inc(&tipc_num_links);
n_ptr->link_cnt++;
}
-void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
+void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
n_ptr->links[l_ptr->b_ptr->identity] = NULL;
atomic_dec(&tipc_num_links);
@@ -307,7 +307,7 @@ static void node_established_contact(struct tipc_node *n_ptr)
n_ptr->bclink.acked = tipc_bclink_get_last_sent();
if (n_ptr->bclink.supported) {
- tipc_nmap_add(&tipc_bcast_nmap, n_ptr->addr);
+ tipc_bclink_add_node(n_ptr->addr);
if (n_ptr->addr < tipc_own_addr)
tipc_own_tag++;
}
@@ -350,9 +350,8 @@ static void node_lost_contact(struct tipc_node *n_ptr)
n_ptr->bclink.defragm = NULL;
}
- tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr);
- tipc_bclink_acknowledge(n_ptr,
- mod(n_ptr->bclink.acked + 10000));
+ tipc_bclink_remove_node(n_ptr->addr);
+ tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
if (n_ptr->addr < tipc_own_addr)
tipc_own_tag--;
@@ -361,7 +360,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
/* Abort link changeover */
for (i = 0; i < MAX_BEARERS; i++) {
- struct link *l_ptr = n_ptr->links[i];
+ struct tipc_link *l_ptr = n_ptr->links[i];
if (!l_ptr)
continue;
l_ptr->reset_checkpoint = l_ptr->next_in_no;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 4f15cb40aaa4..0b1c5f8b6996 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -79,8 +79,8 @@ struct tipc_node {
struct hlist_node hash;
struct list_head list;
struct list_head nsub;
- struct link *active_links[2];
- struct link *links[MAX_BEARERS];
+ struct tipc_link *active_links[2];
+ struct tipc_link *links[MAX_BEARERS];
int link_cnt;
int working_links;
int block_setup;
@@ -117,10 +117,10 @@ extern u32 tipc_own_tag;
struct tipc_node *tipc_node_find(u32 addr);
struct tipc_node *tipc_node_create(u32 addr);
void tipc_node_delete(struct tipc_node *n_ptr);
-void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr);
-void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr);
-void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr);
-void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr);
+void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
+void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
+void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
+void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
int tipc_node_active_links(struct tipc_node *n_ptr);
int tipc_node_redundant_links(struct tipc_node *n_ptr);
int tipc_node_is_up(struct tipc_node *n_ptr);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 54d812a5a4d9..d91efc69e6f9 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -80,7 +80,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
struct tipc_msg *hdr;
struct sk_buff *buf;
struct sk_buff *ibuf = NULL;
- struct port_list dports = {0, NULL, };
+ struct tipc_port_list dports = {0, NULL, };
struct tipc_port *oport = tipc_port_deref(ref);
int ext_targets;
int res;
@@ -142,11 +142,11 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
* If there is no port list, perform a lookup to create one
*/
-void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
+void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
{
struct tipc_msg *msg;
- struct port_list dports = {0, NULL, };
- struct port_list *item = dp;
+ struct tipc_port_list dports = {0, NULL, };
+ struct tipc_port_list *item = dp;
int cnt = 0;
msg = buf_msg(buf);
diff --git a/net/tipc/port.h b/net/tipc/port.h
index b9aa34195aec..f751807e2a91 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -151,7 +151,7 @@ struct tipc_port {
};
extern spinlock_t tipc_port_list_lock;
-struct port_list;
+struct tipc_port_list;
/*
* TIPC port manipulation routines
@@ -228,7 +228,7 @@ int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr,
unsigned int total_len, int err);
struct sk_buff *tipc_port_get_ports(void);
void tipc_port_recv_proto_msg(struct sk_buff *buf);
-void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp);
+void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp);
void tipc_port_reinit(void);
/**
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 83116892528b..9e37b7812c3c 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -110,8 +110,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start)
/* allocate table & mark all entries as uninitialized */
- table = __vmalloc(actual_size * sizeof(struct reference),
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+ table = vzalloc(actual_size * sizeof(struct reference));
if (table == NULL)
return -ENOMEM;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 42b8324ff2ee..e2f7c5d370ba 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -185,9 +185,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
/* Validate arguments */
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
if (unlikely(protocol != 0))
return -EPROTONOSUPPORT;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 198371723b41..8c49566da8f3 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -40,14 +40,14 @@
#include "subscr.h"
/**
- * struct subscriber - TIPC network topology subscriber
+ * struct tipc_subscriber - TIPC network topology subscriber
* @port_ref: object reference to server port connecting to subscriber
* @lock: pointer to spinlock controlling access to subscriber's server port
* @subscriber_list: adjacent subscribers in top. server's list of subscribers
* @subscription_list: list of subscription objects for this subscriber
*/
-struct subscriber {
+struct tipc_subscriber {
u32 port_ref;
spinlock_t *lock;
struct list_head subscriber_list;
@@ -92,7 +92,7 @@ static u32 htohl(u32 in, int swap)
* try to take the lock if the message is rejected and returned!
*/
-static void subscr_send_event(struct subscription *sub,
+static void subscr_send_event(struct tipc_subscription *sub,
u32 found_lower,
u32 found_upper,
u32 event,
@@ -118,7 +118,7 @@ static void subscr_send_event(struct subscription *sub,
* Returns 1 if there is overlap, otherwise 0.
*/
-int tipc_subscr_overlap(struct subscription *sub,
+int tipc_subscr_overlap(struct tipc_subscription *sub,
u32 found_lower,
u32 found_upper)
@@ -138,7 +138,7 @@ int tipc_subscr_overlap(struct subscription *sub,
* Protected by nameseq.lock in name_table.c
*/
-void tipc_subscr_report_overlap(struct subscription *sub,
+void tipc_subscr_report_overlap(struct tipc_subscription *sub,
u32 found_lower,
u32 found_upper,
u32 event,
@@ -158,7 +158,7 @@ void tipc_subscr_report_overlap(struct subscription *sub,
* subscr_timeout - subscription timeout has occurred
*/
-static void subscr_timeout(struct subscription *sub)
+static void subscr_timeout(struct tipc_subscription *sub)
{
struct tipc_port *server_port;
@@ -205,7 +205,7 @@ static void subscr_timeout(struct subscription *sub)
* Called with subscriber port locked.
*/
-static void subscr_del(struct subscription *sub)
+static void subscr_del(struct tipc_subscription *sub)
{
tipc_nametbl_unsubscribe(sub);
list_del(&sub->subscription_list);
@@ -224,11 +224,11 @@ static void subscr_del(struct subscription *sub)
* simply wait for it to be released, then claim it.)
*/
-static void subscr_terminate(struct subscriber *subscriber)
+static void subscr_terminate(struct tipc_subscriber *subscriber)
{
u32 port_ref;
- struct subscription *sub;
- struct subscription *sub_temp;
+ struct tipc_subscription *sub;
+ struct tipc_subscription *sub_temp;
/* Invalidate subscriber reference */
@@ -278,10 +278,10 @@ static void subscr_terminate(struct subscriber *subscriber)
*/
static void subscr_cancel(struct tipc_subscr *s,
- struct subscriber *subscriber)
+ struct tipc_subscriber *subscriber)
{
- struct subscription *sub;
- struct subscription *sub_temp;
+ struct tipc_subscription *sub;
+ struct tipc_subscription *sub_temp;
int found = 0;
/* Find first matching subscription, exit if not found */
@@ -314,10 +314,10 @@ static void subscr_cancel(struct tipc_subscr *s,
* Called with subscriber port locked.
*/
-static struct subscription *subscr_subscribe(struct tipc_subscr *s,
- struct subscriber *subscriber)
+static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
+ struct tipc_subscriber *subscriber)
{
- struct subscription *sub;
+ struct tipc_subscription *sub;
int swap;
/* Determine subscriber's endianness */
@@ -393,7 +393,7 @@ static void subscr_conn_shutdown_event(void *usr_handle,
unsigned int size,
int reason)
{
- struct subscriber *subscriber = usr_handle;
+ struct tipc_subscriber *subscriber = usr_handle;
spinlock_t *subscriber_lock;
if (tipc_port_lock(port_ref) == NULL)
@@ -416,9 +416,9 @@ static void subscr_conn_msg_event(void *usr_handle,
const unchar *data,
u32 size)
{
- struct subscriber *subscriber = usr_handle;
+ struct tipc_subscriber *subscriber = usr_handle;
spinlock_t *subscriber_lock;
- struct subscription *sub;
+ struct tipc_subscription *sub;
/*
* Lock subscriber's server port (& make a local copy of lock pointer,
@@ -471,12 +471,12 @@ static void subscr_named_msg_event(void *usr_handle,
struct tipc_portid const *orig,
struct tipc_name_seq const *dest)
{
- struct subscriber *subscriber;
+ struct tipc_subscriber *subscriber;
u32 server_port_ref;
/* Create subscriber object */
- subscriber = kzalloc(sizeof(struct subscriber), GFP_ATOMIC);
+ subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC);
if (subscriber == NULL) {
warn("Subscriber rejected, no memory\n");
return;
@@ -568,8 +568,8 @@ failed:
void tipc_subscr_stop(void)
{
- struct subscriber *subscriber;
- struct subscriber *subscriber_temp;
+ struct tipc_subscriber *subscriber;
+ struct tipc_subscriber *subscriber_temp;
spinlock_t *subscriber_lock;
if (topsrv.setup_port) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 4b06ef6f8401..ef6529c8456f 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -37,10 +37,10 @@
#ifndef _TIPC_SUBSCR_H
#define _TIPC_SUBSCR_H
-struct subscription;
+struct tipc_subscription;
/**
- * struct subscription - TIPC network topology subscription object
+ * struct tipc_subscription - TIPC network topology subscription object
* @seq: name sequence associated with subscription
* @timeout: duration of subscription (in ms)
* @filter: event filtering to be done for subscription
@@ -52,7 +52,7 @@ struct subscription;
* @evt: template for events generated by subscription
*/
-struct subscription {
+struct tipc_subscription {
struct tipc_name_seq seq;
u32 timeout;
u32 filter;
@@ -64,11 +64,11 @@ struct subscription {
struct tipc_event evt;
};
-int tipc_subscr_overlap(struct subscription *sub,
+int tipc_subscr_overlap(struct tipc_subscription *sub,
u32 found_lower,
u32 found_upper);
-void tipc_subscr_report_overlap(struct subscription *sub,
+void tipc_subscr_report_overlap(struct tipc_subscription *sub,
u32 found_lower,
u32 found_upper,
u32 event,
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 5a69733bcdad..c2128b10e5f9 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -19,3 +19,10 @@ config UNIX
Say Y unless you know what you are doing.
+config UNIX_DIAG
+ tristate "UNIX: socket monitoring interface"
+ depends on UNIX
+ default UNIX
+ ---help---
+ Support for UNIX socket monitoring interface used by the ss tool.
+ If unsure, say Y.
diff --git a/net/unix/Makefile b/net/unix/Makefile
index b852a2bde9a8..b663c607b1c6 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -6,3 +6,6 @@ obj-$(CONFIG_UNIX) += unix.o
unix-y := af_unix.o garbage.o
unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
+
+obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
+unix_diag-y := diag.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 466fbcc5cf77..7cc3d7b23d1c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -115,8 +115,10 @@
#include <net/checksum.h>
#include <linux/security.h>
-static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
-static DEFINE_SPINLOCK(unix_table_lock);
+struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+EXPORT_SYMBOL_GPL(unix_socket_table);
+DEFINE_SPINLOCK(unix_table_lock);
+EXPORT_SYMBOL_GPL(unix_table_lock);
static atomic_long_t unix_nr_socks;
#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
@@ -172,7 +174,7 @@ static inline int unix_recvq_full(struct sock const *sk)
return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}
-static struct sock *unix_peer_get(struct sock *s)
+struct sock *unix_peer_get(struct sock *s)
{
struct sock *peer;
@@ -183,6 +185,7 @@ static struct sock *unix_peer_get(struct sock *s)
unix_state_unlock(s);
return peer;
}
+EXPORT_SYMBOL_GPL(unix_peer_get);
static inline void unix_release_addr(struct unix_address *addr)
{
@@ -1957,6 +1960,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
if ((UNIXCB(skb).pid != siocb->scm->pid) ||
(UNIXCB(skb).cred != siocb->scm->cred)) {
skb_queue_head(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
break;
}
} else {
@@ -1974,6 +1978,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
chunk = min_t(unsigned int, skb->len, size);
if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
skb_queue_head(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
if (copied == 0)
copied = -EFAULT;
break;
@@ -1991,6 +1996,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
/* put the skb back if we didn't use it up.. */
if (skb->len) {
skb_queue_head(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
break;
}
@@ -2006,6 +2012,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
/* put message back and return */
skb_queue_head(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
break;
}
} while (size);
@@ -2058,6 +2065,36 @@ static int unix_shutdown(struct socket *sock, int mode)
return 0;
}
+long unix_inq_len(struct sock *sk)
+{
+ struct sk_buff *skb;
+ long amount = 0;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -EINVAL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+ if (sk->sk_type == SOCK_STREAM ||
+ sk->sk_type == SOCK_SEQPACKET) {
+ skb_queue_walk(&sk->sk_receive_queue, skb)
+ amount += skb->len;
+ } else {
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb)
+ amount = skb->len;
+ }
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ return amount;
+}
+EXPORT_SYMBOL_GPL(unix_inq_len);
+
+long unix_outq_len(struct sock *sk)
+{
+ return sk_wmem_alloc_get(sk);
+}
+EXPORT_SYMBOL_GPL(unix_outq_len);
+
static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -2066,33 +2103,16 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
switch (cmd) {
case SIOCOUTQ:
- amount = sk_wmem_alloc_get(sk);
+ amount = unix_outq_len(sk);
err = put_user(amount, (int __user *)arg);
break;
case SIOCINQ:
- {
- struct sk_buff *skb;
-
- if (sk->sk_state == TCP_LISTEN) {
- err = -EINVAL;
- break;
- }
-
- spin_lock(&sk->sk_receive_queue.lock);
- if (sk->sk_type == SOCK_STREAM ||
- sk->sk_type == SOCK_SEQPACKET) {
- skb_queue_walk(&sk->sk_receive_queue, skb)
- amount += skb->len;
- } else {
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb)
- amount = skb->len;
- }
- spin_unlock(&sk->sk_receive_queue.lock);
+ amount = unix_inq_len(sk);
+ if (amount < 0)
+ err = amount;
+ else
err = put_user(amount, (int __user *)arg);
- break;
- }
-
+ break;
default:
err = -ENOIOCTLCMD;
break;
diff --git a/net/unix/diag.c b/net/unix/diag.c
new file mode 100644
index 000000000000..6b7697fd911b
--- /dev/null
+++ b/net/unix/diag.c
@@ -0,0 +1,329 @@
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <net/netlink.h>
+#include <net/af_unix.h>
+#include <net/tcp_states.h>
+
+#define UNIX_DIAG_PUT(skb, attrtype, attrlen) \
+ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
+
+static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
+{
+ struct unix_address *addr = unix_sk(sk)->addr;
+ char *s;
+
+ if (addr) {
+ s = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short));
+ memcpy(s, addr->name->sun_path, addr->len - sizeof(short));
+ }
+
+ return 0;
+
+rtattr_failure:
+ return -EMSGSIZE;
+}
+
+static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb)
+{
+ struct dentry *dentry = unix_sk(sk)->dentry;
+ struct unix_diag_vfs *uv;
+
+ if (dentry) {
+ uv = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_VFS, sizeof(*uv));
+ uv->udiag_vfs_ino = dentry->d_inode->i_ino;
+ uv->udiag_vfs_dev = dentry->d_sb->s_dev;
+ }
+
+ return 0;
+
+rtattr_failure:
+ return -EMSGSIZE;
+}
+
+static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb)
+{
+ struct sock *peer;
+ int ino;
+
+ peer = unix_peer_get(sk);
+ if (peer) {
+ unix_state_lock(peer);
+ ino = sock_i_ino(peer);
+ unix_state_unlock(peer);
+ sock_put(peer);
+
+ RTA_PUT_U32(nlskb, UNIX_DIAG_PEER, ino);
+ }
+
+ return 0;
+rtattr_failure:
+ return -EMSGSIZE;
+}
+
+static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
+{
+ struct sk_buff *skb;
+ u32 *buf;
+ int i;
+
+ if (sk->sk_state == TCP_LISTEN) {
+ spin_lock(&sk->sk_receive_queue.lock);
+ buf = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_ICONS,
+ sk->sk_receive_queue.qlen * sizeof(u32));
+ i = 0;
+ skb_queue_walk(&sk->sk_receive_queue, skb) {
+ struct sock *req, *peer;
+
+ req = skb->sk;
+ /*
+ * The state lock is outer for the same sk's
+ * queue lock. With the other's queue locked it's
+ * OK to lock the state.
+ */
+ unix_state_lock_nested(req);
+ peer = unix_sk(req)->peer;
+ buf[i++] = (peer ? sock_i_ino(peer) : 0);
+ unix_state_unlock(req);
+ }
+ spin_unlock(&sk->sk_receive_queue.lock);
+ }
+
+ return 0;
+
+rtattr_failure:
+ spin_unlock(&sk->sk_receive_queue.lock);
+ return -EMSGSIZE;
+}
+
+static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
+{
+ struct unix_diag_rqlen *rql;
+
+ rql = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_RQLEN, sizeof(*rql));
+
+ if (sk->sk_state == TCP_LISTEN) {
+ rql->udiag_rqueue = sk->sk_receive_queue.qlen;
+ rql->udiag_wqueue = sk->sk_max_ack_backlog;
+ } else {
+ rql->udiag_rqueue = (__u32)unix_inq_len(sk);
+ rql->udiag_wqueue = (__u32)unix_outq_len(sk);
+ }
+
+ return 0;
+
+rtattr_failure:
+ return -EMSGSIZE;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
+ u32 pid, u32 seq, u32 flags, int sk_ino)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlmsghdr *nlh;
+ struct unix_diag_msg *rep;
+
+ nlh = NLMSG_PUT(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep));
+ nlh->nlmsg_flags = flags;
+
+ rep = NLMSG_DATA(nlh);
+
+ rep->udiag_family = AF_UNIX;
+ rep->udiag_type = sk->sk_type;
+ rep->udiag_state = sk->sk_state;
+ rep->udiag_ino = sk_ino;
+ sock_diag_save_cookie(sk, rep->udiag_cookie);
+
+ if ((req->udiag_show & UDIAG_SHOW_NAME) &&
+ sk_diag_dump_name(sk, skb))
+ goto nlmsg_failure;
+
+ if ((req->udiag_show & UDIAG_SHOW_VFS) &&
+ sk_diag_dump_vfs(sk, skb))
+ goto nlmsg_failure;
+
+ if ((req->udiag_show & UDIAG_SHOW_PEER) &&
+ sk_diag_dump_peer(sk, skb))
+ goto nlmsg_failure;
+
+ if ((req->udiag_show & UDIAG_SHOW_ICONS) &&
+ sk_diag_dump_icons(sk, skb))
+ goto nlmsg_failure;
+
+ if ((req->udiag_show & UDIAG_SHOW_RQLEN) &&
+ sk_diag_show_rqlen(sk, skb))
+ goto nlmsg_failure;
+
+ if ((req->udiag_show & UDIAG_SHOW_MEMINFO) &&
+ sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
+ goto nlmsg_failure;
+
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ return skb->len;
+
+nlmsg_failure:
+ nlmsg_trim(skb, b);
+ return -EMSGSIZE;
+}
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
+ u32 pid, u32 seq, u32 flags)
+{
+ int sk_ino;
+
+ unix_state_lock(sk);
+ sk_ino = sock_i_ino(sk);
+ unix_state_unlock(sk);
+
+ if (!sk_ino)
+ return 0;
+
+ return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino);
+}
+
+static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct unix_diag_req *req;
+ int num, s_num, slot, s_slot;
+
+ req = NLMSG_DATA(cb->nlh);
+
+ s_slot = cb->args[0];
+ num = s_num = cb->args[1];
+
+ spin_lock(&unix_table_lock);
+ for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) {
+ struct sock *sk;
+ struct hlist_node *node;
+
+ num = 0;
+ sk_for_each(sk, node, &unix_socket_table[slot]) {
+ if (num < s_num)
+ goto next;
+ if (!(req->udiag_states & (1 << sk->sk_state)))
+ goto next;
+ if (sk_diag_dump(sk, skb, req,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next:
+ num++;
+ }
+ }
+done:
+ spin_unlock(&unix_table_lock);
+ cb->args[0] = slot;
+ cb->args[1] = num;
+
+ return skb->len;
+}
+
+static struct sock *unix_lookup_by_ino(int ino)
+{
+ int i;
+ struct sock *sk;
+
+ spin_lock(&unix_table_lock);
+ for (i = 0; i <= UNIX_HASH_SIZE; i++) {
+ struct hlist_node *node;
+
+ sk_for_each(sk, node, &unix_socket_table[i])
+ if (ino == sock_i_ino(sk)) {
+ sock_hold(sk);
+ spin_unlock(&unix_table_lock);
+
+ return sk;
+ }
+ }
+
+ spin_unlock(&unix_table_lock);
+ return NULL;
+}
+
+static int unix_diag_get_exact(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh,
+ struct unix_diag_req *req)
+{
+ int err = -EINVAL;
+ struct sock *sk;
+ struct sk_buff *rep;
+ unsigned int extra_len;
+
+ if (req->udiag_ino == 0)
+ goto out_nosk;
+
+ sk = unix_lookup_by_ino(req->udiag_ino);
+ err = -ENOENT;
+ if (sk == NULL)
+ goto out_nosk;
+
+ err = sock_diag_check_cookie(sk, req->udiag_cookie);
+ if (err)
+ goto out;
+
+ extra_len = 256;
+again:
+ err = -ENOMEM;
+ rep = alloc_skb(NLMSG_SPACE((sizeof(struct unix_diag_msg) + extra_len)),
+ GFP_KERNEL);
+ if (!rep)
+ goto out;
+
+ err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid,
+ nlh->nlmsg_seq, 0, req->udiag_ino);
+ if (err < 0) {
+ kfree_skb(rep);
+ extra_len += 256;
+ if (extra_len >= PAGE_SIZE)
+ goto out;
+
+ goto again;
+ }
+ err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+ MSG_DONTWAIT);
+ if (err > 0)
+ err = 0;
+out:
+ if (sk)
+ sock_put(sk);
+out_nosk:
+ return err;
+}
+
+static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct unix_diag_req);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP)
+ return netlink_dump_start(sock_diag_nlsk, skb, h,
+ unix_diag_dump, NULL, 0);
+ else
+ return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h));
+}
+
+static struct sock_diag_handler unix_diag_handler = {
+ .family = AF_UNIX,
+ .dump = unix_diag_handler_dump,
+};
+
+static int __init unix_diag_init(void)
+{
+ return sock_diag_register(&unix_diag_handler);
+}
+
+static void __exit unix_diag_exit(void)
+{
+ sock_diag_unregister(&unix_diag_handler);
+}
+
+module_init(unix_diag_init);
+module_exit(unix_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3e16c6abde4f..a306bc66000e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -232,7 +232,7 @@ static int x25_device_event(struct notifier_block *this, unsigned long event,
return NOTIFY_DONE;
if (dev->type == ARPHRD_X25
-#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+#if IS_ENABLED(CONFIG_LLC)
|| dev->type == ARPHRD_ETHER
#endif
) {
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index fa2b41888bd9..f0ce862d1f46 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -161,7 +161,7 @@ void x25_establish_link(struct x25_neigh *nb)
*ptr = X25_IFACE_CONNECT;
break;
-#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+#if IS_ENABLED(CONFIG_LLC)
case ARPHRD_ETHER:
return;
#endif
@@ -180,7 +180,7 @@ void x25_terminate_link(struct x25_neigh *nb)
struct sk_buff *skb;
unsigned char *ptr;
-#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+#if IS_ENABLED(CONFIG_LLC)
if (nb->dev->type == ARPHRD_ETHER)
return;
#endif
@@ -213,7 +213,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
*dptr = X25_IFACE_DATA;
break;
-#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+#if IS_ENABLED(CONFIG_LLC)
case ARPHRD_ETHER:
kfree_skb(skb);
return;
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 97d77c532d8c..cf6366270054 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -134,7 +134,7 @@ struct net_device *x25_dev_get(char *devname)
if (dev &&
(!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25
-#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+#if IS_ENABLED(CONFIG_LLC)
&& dev->type != ARPHRD_ETHER
#endif
))){
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 552df27dcf53..7661576b6f45 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -61,8 +61,8 @@ __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
{
const struct flowi4 *fl4 = &fl->u.ip4;
- return addr_match(&fl4->daddr, &sel->daddr, sel->prefixlen_d) &&
- addr_match(&fl4->saddr, &sel->saddr, sel->prefixlen_s) &&
+ return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
+ addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
(fl4->flowi4_proto == sel->proto || !sel->proto) &&
@@ -1340,7 +1340,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
case AF_INET:
dst_ops = &net->xfrm.xfrm4_dst_ops;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
dst_ops = &net->xfrm.xfrm6_dst_ops;
break;
@@ -1499,7 +1499,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
goto free_dst;
/* Copy neighbour for reachability confirmation */
- dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour(dst)));
+ dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour_noref(dst)));
xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
xfrm_init_pmtu(dst_prev);
@@ -2276,8 +2276,6 @@ static void __xfrm_garbage_collect(struct net *net)
{
struct dst_entry *head, *next;
- flow_cache_flush();
-
spin_lock_bh(&xfrm_policy_sk_bundle_lock);
head = xfrm_policy_sk_bundles;
xfrm_policy_sk_bundles = NULL;
@@ -2290,6 +2288,18 @@ static void __xfrm_garbage_collect(struct net *net)
}
}
+static void xfrm_garbage_collect(struct net *net)
+{
+ flow_cache_flush();
+ __xfrm_garbage_collect(net);
+}
+
+static void xfrm_garbage_collect_deferred(struct net *net)
+{
+ flow_cache_flush_deferred();
+ __xfrm_garbage_collect(net);
+}
+
static void xfrm_init_pmtu(struct dst_entry *dst)
{
do {
@@ -2382,9 +2392,11 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
return dst_metric_advmss(dst->path);
}
-static unsigned int xfrm_default_mtu(const struct dst_entry *dst)
+static unsigned int xfrm_mtu(const struct dst_entry *dst)
{
- return dst_mtu(dst->path);
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst_mtu(dst->path);
}
static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr)
@@ -2411,8 +2423,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
dst_ops->check = xfrm_dst_check;
if (likely(dst_ops->default_advmss == NULL))
dst_ops->default_advmss = xfrm_default_advmss;
- if (likely(dst_ops->default_mtu == NULL))
- dst_ops->default_mtu = xfrm_default_mtu;
+ if (likely(dst_ops->mtu == NULL))
+ dst_ops->mtu = xfrm_mtu;
if (likely(dst_ops->negative_advice == NULL))
dst_ops->negative_advice = xfrm_negative_advice;
if (likely(dst_ops->link_failure == NULL))
@@ -2420,7 +2432,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
if (likely(dst_ops->neigh_lookup == NULL))
dst_ops->neigh_lookup = xfrm_neigh_lookup;
if (likely(afinfo->garbage_collect == NULL))
- afinfo->garbage_collect = __xfrm_garbage_collect;
+ afinfo->garbage_collect = xfrm_garbage_collect_deferred;
xfrm_policy_afinfo[afinfo->family] = afinfo;
}
write_unlock_bh(&xfrm_policy_afinfo_lock);
@@ -2433,7 +2445,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
case AF_INET:
xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
break;
@@ -2483,7 +2495,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net)
afinfo = xfrm_policy_afinfo[AF_INET];
if (afinfo)
net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
afinfo = xfrm_policy_afinfo[AF_INET6];
if (afinfo)
net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
@@ -2514,7 +2526,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
switch (event) {
case NETDEV_DOWN:
- __xfrm_garbage_collect(dev_net(dev));
+ xfrm_garbage_collect(dev_net(dev));
}
return NOTIFY_DONE;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9414b9c5b1e4..5b228f97d4b3 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1035,16 +1035,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
break;
case AF_INET6:
- ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
- (const struct in6_addr *)daddr);
- ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
- (const struct in6_addr *)saddr);
+ *(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr;
+ *(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr;
x->sel.prefixlen_d = 128;
x->sel.prefixlen_s = 128;
- ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
- (const struct in6_addr *)saddr);
- ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
- (const struct in6_addr *)daddr);
+ *(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr;
+ *(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr;
break;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d0a42df5160e..e0d747a2e803 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -28,7 +28,7 @@
#include <net/netlink.h>
#include <net/ah.h>
#include <asm/uaccess.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
#include <linux/in6.h>
#endif
@@ -150,7 +150,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
break;
case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
break;
#else
err = -EAFNOSUPPORT;
@@ -201,7 +201,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
if (attrs[XFRMA_ALG_COMP] ||
@@ -1160,7 +1160,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
break;
case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
break;
#else
return -EAFNOSUPPORT;
@@ -1231,7 +1231,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
switch (ut[i].family) {
case AF_INET:
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
break;
#endif
@@ -2604,7 +2604,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
return NULL;
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
if (opt != IPV6_XFRM_POLICY) {
*dir = -EOPNOTSUPP;