summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-05-11 23:19:48 +0000
committerDavid S. Miller <davem@davemloft.net>2010-05-17 17:18:50 -0700
commit7fee226ad2397b635e2fd565a59ca3ae08a164cd (patch)
tree0bcd26150ad74ec1a237109de87a3d214a07fc22 /include
parentebda37c27d0c768947e9b058332d7ea798210cf8 (diff)
net: add a noref bit on skb dst
Use low order bit of skb->_skb_dst to tell dst is not refcounted. Change _skb_dst to _skb_refdst to make sure all uses are catched. skb_dst() returns the dst, regardless of noref bit set or not, but with a lockdep check to make sure a noref dst is not given if current user is not rcu protected. New skb_dst_set_noref() helper to set an notrefcounted dst on a skb. (with lockdep check) skb_dst_drop() drops a reference only if skb dst was refcounted. skb_dst_force() helper is used to force a refcount on dst, when skb is queued and not anymore RCU protected. Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if !IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in sock_queue_rcv_skb(), in __nf_queue(). Use skb_dst_force() in dev_requeue_skb(). Note: dst_use_noref() still dirties dst, we might transform it later to do one dirtying per jiffies. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/skbuff.h58
-rw-r--r--include/net/dst.h48
-rw-r--r--include/net/sock.h13
3 files changed, 107 insertions, 12 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c9525bce80f6..7cdfb4d52847 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -264,7 +264,7 @@ typedef unsigned char *sk_buff_data_t;
* @transport_header: Transport layer header
* @network_header: Network layer header
* @mac_header: Link layer header
- * @_skb_dst: destination entry
+ * @_skb_refdst: destination entry (with norefcount bit)
* @sp: the security path, used for xfrm
* @cb: Control buffer. Free for use by every layer. Put private vars here
* @len: Length of actual data
@@ -328,7 +328,7 @@ struct sk_buff {
*/
char cb[48] __aligned(8);
- unsigned long _skb_dst;
+ unsigned long _skb_refdst;
#ifdef CONFIG_XFRM
struct sec_path *sp;
#endif
@@ -419,14 +419,64 @@ struct sk_buff {
#include <asm/system.h>
+/*
+ * skb might have a dst pointer attached, refcounted or not.
+ * _skb_refdst low order bit is set if refcount was _not_ taken
+ */
+#define SKB_DST_NOREF 1UL
+#define SKB_DST_PTRMASK ~(SKB_DST_NOREF)
+
+/**
+ * skb_dst - returns skb dst_entry
+ * @skb: buffer
+ *
+ * Returns skb dst_entry, regardless of reference taken or not.
+ */
static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
{
- return (struct dst_entry *)skb->_skb_dst;
+ /* If refdst was not refcounted, check we still are in a
+ * rcu_read_lock section
+ */
+ WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
+ !rcu_read_lock_held() &&
+ !rcu_read_lock_bh_held());
+ return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
}
+/**
+ * skb_dst_set - sets skb dst
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was taken on dst and should
+ * be released by skb_dst_drop()
+ */
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
{
- skb->_skb_dst = (unsigned long)dst;
+ skb->_skb_refdst = (unsigned long)dst;
+}
+
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+ WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+}
+
+/**
+ * skb_dst_is_noref - Test if skb dst isnt refcounted
+ * @skb: buffer
+ */
+static inline bool skb_dst_is_noref(const struct sk_buff *skb)
+{
+ return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
}
static inline struct rtable *skb_rtable(const struct sk_buff *skb)
diff --git a/include/net/dst.h b/include/net/dst.h
index aac5a5fcfda9..27207a13f2a6 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -168,6 +168,12 @@ static inline void dst_use(struct dst_entry *dst, unsigned long time)
dst->lastuse = time;
}
+static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
+{
+ dst->__use++;
+ dst->lastuse = time;
+}
+
static inline
struct dst_entry * dst_clone(struct dst_entry * dst)
{
@@ -177,11 +183,47 @@ struct dst_entry * dst_clone(struct dst_entry * dst)
}
extern void dst_release(struct dst_entry *dst);
+
+static inline void refdst_drop(unsigned long refdst)
+{
+ if (!(refdst & SKB_DST_NOREF))
+ dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
+}
+
+/**
+ * skb_dst_drop - drops skb dst
+ * @skb: buffer
+ *
+ * Drops dst reference count if a reference was taken.
+ */
static inline void skb_dst_drop(struct sk_buff *skb)
{
- if (skb->_skb_dst)
- dst_release(skb_dst(skb));
- skb->_skb_dst = 0UL;
+ if (skb->_skb_refdst) {
+ refdst_drop(skb->_skb_refdst);
+ skb->_skb_refdst = 0UL;
+ }
+}
+
+static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
+{
+ nskb->_skb_refdst = oskb->_skb_refdst;
+ if (!(nskb->_skb_refdst & SKB_DST_NOREF))
+ dst_clone(skb_dst(nskb));
+}
+
+/**
+ * skb_dst_force - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted, let's do it
+ */
+static inline void skb_dst_force(struct sk_buff *skb)
+{
+ if (skb_dst_is_noref(skb)) {
+ WARN_ON(!rcu_read_lock_held());
+ skb->_skb_refdst &= ~SKB_DST_NOREF;
+ dst_clone(skb_dst(skb));
+ }
}
/* Children define the path of the packet through the
diff --git a/include/net/sock.h b/include/net/sock.h
index aed16eb9db4b..5697caf8cc76 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -600,12 +600,15 @@ static inline int sk_stream_memory_free(struct sock *sk)
/* OOB backlog add */
static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_backlog.tail) {
- sk->sk_backlog.head = sk->sk_backlog.tail = skb;
- } else {
+ /* dont let skb dst not refcounted, we are going to leave rcu lock */
+ skb_dst_force(skb);
+
+ if (!sk->sk_backlog.tail)
+ sk->sk_backlog.head = skb;
+ else
sk->sk_backlog.tail->next = skb;
- sk->sk_backlog.tail = skb;
- }
+
+ sk->sk_backlog.tail = skb;
skb->next = NULL;
}