diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
commit | 5518b69b76680a4f2df96b1deca260059db0c2de (patch) | |
tree | f33cd1519c8efb4590500f2f9617400be233238c /net/core/sock.c | |
parent | 8ad06e56dcbc1984ef0ff8f6e3c19982c5809f73 (diff) | |
parent | 0e72582270c07850b92cac351c8b97d4f9c123b9 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Reasonably busy this cycle, but perhaps not as busy as in the 4.12
merge window:
1) Several optimizations for UDP processing under high load from
Paolo Abeni.
2) Support pacing internally in TCP when using the sch_fq packet
scheduler for this is not practical. From Eric Dumazet.
3) Support mutliple filter chains per qdisc, from Jiri Pirko.
4) Move to 1ms TCP timestamp clock, from Eric Dumazet.
5) Add batch dequeueing to vhost_net, from Jason Wang.
6) Flesh out more completely SCTP checksum offload support, from
Davide Caratti.
7) More plumbing of extended netlink ACKs, from David Ahern, Pablo
Neira Ayuso, and Matthias Schiffer.
8) Add devlink support to nfp driver, from Simon Horman.
9) Add RTM_F_FIB_MATCH flag to RTM_GETROUTE queries, from Roopa
Prabhu.
10) Add stack depth tracking to BPF verifier and use this information
in the various eBPF JITs. From Alexei Starovoitov.
11) Support XDP on qed device VFs, from Yuval Mintz.
12) Introduce BPF PROG ID for better introspection of installed BPF
programs. From Martin KaFai Lau.
13) Add bpf_set_hash helper for TC bpf programs, from Daniel Borkmann.
14) For loads, allow narrower accesses in bpf verifier checking, from
Yonghong Song.
15) Support MIPS in the BPF selftests and samples infrastructure, the
MIPS eBPF JIT will be merged in via the MIPS GIT tree. From David
Daney.
16) Support kernel based TLS, from Dave Watson and others.
17) Remove completely DST garbage collection, from Wei Wang.
18) Allow installing TCP MD5 rules using prefixes, from Ivan
Delalande.
19) Add XDP support to Intel i40e driver, from Björn Töpel
20) Add support for TC flower offload in nfp driver, from Simon
Horman, Pieter Jansen van Vuuren, Benjamin LaHaise, Jakub
Kicinski, and Bert van Leeuwen.
21) IPSEC offloading support in mlx5, from Ilan Tayari.
22) Add HW PTP support to macb driver, from Rafal Ozieblo.
23) Networking refcount_t conversions, From Elena Reshetova.
24) Add sock_ops support to BPF, from Lawrence Brako. This is useful
for tuning the TCP sockopt settings of a group of applications,
currently via CGROUPs"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1899 commits)
net: phy: dp83867: add workaround for incorrect RX_CTRL pin strap
dt-bindings: phy: dp83867: provide a workaround for incorrect RX_CTRL pin strap
cxgb4: Support for get_ts_info ethtool method
cxgb4: Add PTP Hardware Clock (PHC) support
cxgb4: time stamping interface for PTP
nfp: default to chained metadata prepend format
nfp: remove legacy MAC address lookup
nfp: improve order of interfaces in breakout mode
net: macb: remove extraneous return when MACB_EXT_DESC is defined
bpf: add missing break in for the TCP_BPF_SNDCWND_CLAMP case
bpf: fix return in load_bpf_file
mpls: fix rtm policy in mpls_getroute
net, ax25: convert ax25_cb.refcount from atomic_t to refcount_t
net, ax25: convert ax25_route.refcount from atomic_t to refcount_t
net, ax25: convert ax25_uid_assoc.refcount from atomic_t to refcount_t
net, sctp: convert sctp_ep_common.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_transport.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_chunk.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_datamsg.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_auth_bytes.refcnt from atomic_t to refcount_t
...
Diffstat (limited to 'net/core/sock.c')
-rw-r--r-- | net/core/sock.c | 89 |
1 files changed, 73 insertions, 16 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 0c3fc16223f9..ac2a404c73eb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1038,6 +1038,10 @@ set_rcvbuf: #endif case SO_MAX_PACING_RATE: + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); @@ -1074,6 +1078,18 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred, } } +static int groups_to_user(gid_t __user *dst, const struct group_info *src) +{ + struct user_namespace *user_ns = current_user_ns(); + int i; + + for (i = 0; i < src->ngroups; i++) + if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) + return -EFAULT; + + return 0; +} + int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -1227,6 +1243,27 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; } + case SO_PEERGROUPS: + { + int ret, n; + + if (!sk->sk_peer_cred) + return -ENODATA; + + n = sk->sk_peer_cred->group_info->ngroups; + if (len < n * sizeof(gid_t)) { + len = n * sizeof(gid_t); + return put_user(len, optlen) ? -EFAULT : -ERANGE; + } + len = n * sizeof(gid_t); + + ret = groups_to_user((gid_t __user *)optval, + sk->sk_peer_cred->group_info); + if (ret) + return ret; + goto lenout; + } + case SO_PEERNAME: { char address[128]; @@ -1491,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) get_net(net); sock_net_set(sk, net); - atomic_set(&sk->sk_wmem_alloc, 1); + refcount_set(&sk->sk_wmem_alloc, 1); mem_cgroup_sk_alloc(sk); cgroup_sk_alloc(&sk->sk_cgrp_data); @@ -1515,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_destruct(sk); filter = rcu_dereference_check(sk->sk_filter, - atomic_read(&sk->sk_wmem_alloc) == 0); + refcount_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); @@ -1565,7 +1602,7 @@ void sk_free(struct sock *sk) * some packets are still in some tx queue. * If not null, sock_wfree() will call __sk_free(sk) later */ - if (atomic_dec_and_test(&sk->sk_wmem_alloc)) + if (refcount_dec_and_test(&sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sk_free); @@ -1622,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ - atomic_set(&newsk->sk_wmem_alloc, 1); + refcount_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); sk_init_common(newsk); @@ -1671,7 +1708,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&newsk->sk_refcnt, 2); + refcount_set(&newsk->sk_refcnt, 2); /* * Increment the counter in the same struct proto as the master @@ -1750,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb) * Keep a reference on sk_wmem_alloc, this will be released * after sk_write_space() call */ - atomic_sub(len - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); sk->sk_write_space(sk); len = 1; } @@ -1758,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb) * if sk_wmem_alloc reaches 0, we must finish what sk_free() * could not do because of in-flight packets */ - if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(len, &sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); @@ -1770,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) + if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) __sk_free(sk); } @@ -1792,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) * is enough to guarantee sk_free() wont free this sock until * all in-flight packets are completed */ - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(skb_set_owner_w); @@ -1814,8 +1851,8 @@ void skb_orphan_partial(struct sk_buff *skb) ) { struct sock *sk = skb->sk; - if (atomic_inc_not_zero(&sk->sk_refcnt)) { - atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + if (refcount_inc_not_zero(&sk->sk_refcnt)) { + WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); skb->destructor = sock_efree; } } else { @@ -1875,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { - if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { + if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { struct sk_buff *skb = alloc_skb(size, priority); if (skb) { skb_set_owner_w(skb, sk); @@ -1950,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) + if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; if (sk->sk_shutdown & SEND_SHUTDOWN) break; @@ -2072,6 +2109,26 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, } EXPORT_SYMBOL(sock_cmsg_send); +static void sk_enter_memory_pressure(struct sock *sk) +{ + if (!sk->sk_prot->enter_memory_pressure) + return; + + sk->sk_prot->enter_memory_pressure(sk); +} + +static void sk_leave_memory_pressure(struct sock *sk) +{ + if (sk->sk_prot->leave_memory_pressure) { + sk->sk_prot->leave_memory_pressure(sk); + } else { + unsigned long *memory_pressure = sk->sk_prot->memory_pressure; + + if (memory_pressure && *memory_pressure) + *memory_pressure = 0; + } +} + /* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) @@ -2253,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (sk->sk_type == SOCK_STREAM) { if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) return 1; - } else if (atomic_read(&sk->sk_wmem_alloc) < + } else if (refcount_read(&sk->sk_wmem_alloc) < prot->sysctl_wmem[0]) return 1; } @@ -2520,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk) /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { + if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | @@ -2630,7 +2687,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) * (Documentation/RCU/rculist_nulls.txt for details) */ smp_wmb(); - atomic_set(&sk->sk_refcnt, 1); + refcount_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); |