diff options
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r-- | net/core/skbuff.c | 76 |
1 files changed, 49 insertions, 27 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 466999a7515e..83f8cd8aa2d1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -277,6 +277,7 @@ static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) #endif struct napi_alloc_cache { + local_lock_t bh_lock; struct page_frag_cache page; struct page_frag_1k page_small; unsigned int skb_count; @@ -284,7 +285,9 @@ struct napi_alloc_cache { }; static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); -static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); +static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; /* Double check that napi_get_frags() allocates skbs with * skb->head being backed by slab, not a page fragment. @@ -306,11 +309,16 @@ void napi_get_frags_check(struct napi_struct *napi) void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + void *data; fragsz = SKB_DATA_ALIGN(fragsz); - return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + local_lock_nested_bh(&napi_alloc_cache.bh_lock); + data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); + return data; + } EXPORT_SYMBOL(__napi_alloc_frag_align); @@ -318,19 +326,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { void *data; - fragsz = SKB_DATA_ALIGN(fragsz); if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); + fragsz = SKB_DATA_ALIGN(fragsz); data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); } else { - struct napi_alloc_cache *nc; - local_bh_disable(); - nc = this_cpu_ptr(&napi_alloc_cache); - data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, - align_mask); + data = __napi_alloc_frag_align(fragsz, align_mask); local_bh_enable(); } return data; @@ -342,16 +346,20 @@ static struct sk_buff *napi_skb_cache_get(void) struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; + local_lock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); - if (unlikely(!nc->skb_count)) + if (unlikely(!nc->skb_count)) { + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return NULL; + } } skb = nc->skb_cache[--nc->skb_count]; + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache)); return skb; @@ -744,9 +752,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, pfmemalloc = nc->pfmemalloc; } else { local_bh_disable(); + local_lock_nested_bh(&napi_alloc_cache.bh_lock); + nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; + + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); local_bh_enable(); } @@ -810,11 +822,11 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) goto skb_success; } - nc = this_cpu_ptr(&napi_alloc_cache); - if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; + local_lock_nested_bh(&napi_alloc_cache.bh_lock); + nc = this_cpu_ptr(&napi_alloc_cache); if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { /* we are artificially inflating the allocation size, but * that is not as bad as it may look like, as: @@ -836,6 +848,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) data = page_frag_alloc(&nc->page, len, gfp_mask); pfmemalloc = nc->page.pfmemalloc; } + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) return NULL; @@ -1002,8 +1015,10 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, EXPORT_SYMBOL(skb_cow_data_for_xdp); #if IS_ENABLED(CONFIG_PAGE_POOL) -bool napi_pp_put_page(struct page *page) +bool napi_pp_put_page(netmem_ref netmem) { + struct page *page = netmem_to_page(netmem); + page = compound_head(page); /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation @@ -1016,7 +1031,7 @@ bool napi_pp_put_page(struct page *page) if (unlikely(!is_pp_page(page))) return false; - page_pool_put_full_page(page->pp, page, false); + page_pool_put_full_netmem(page->pp, page_to_netmem(page), false); return true; } @@ -1027,7 +1042,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; - return napi_pp_put_page(virt_to_page(data)); + return napi_pp_put_page(page_to_netmem(virt_to_page(data))); } /** @@ -1190,7 +1205,8 @@ void __kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(__kfree_skb); static __always_inline -bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +bool __sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason) { if (unlikely(!skb_unref(skb))) return false; @@ -1203,26 +1219,27 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); else - trace_kfree_skb(skb, __builtin_return_address(0), reason); + trace_kfree_skb(skb, __builtin_return_address(0), reason, sk); return true; } /** - * kfree_skb_reason - free an sk_buff with special reason + * sk_skb_reason_drop - free an sk_buff with special reason + * @sk: the socket to receive @skb, or NULL if not applicable * @skb: buffer to free * @reason: reason why this skb is dropped * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' - * tracepoint. + * Drop a reference to the buffer and free it if the usage count has hit + * zero. Meanwhile, pass the receiving socket and drop reason to + * 'kfree_skb' tracepoint. */ void __fix_address -kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { - if (__kfree_skb_reason(skb, reason)) + if (__sk_skb_reason_drop(sk, skb, reason)) __kfree_skb(skb); } -EXPORT_SYMBOL(kfree_skb_reason); +EXPORT_SYMBOL(sk_skb_reason_drop); #define KFREE_SKB_BULK_SIZE 16 @@ -1261,7 +1278,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason) while (segs) { struct sk_buff *next = segs->next; - if (__kfree_skb_reason(segs, reason)) { + if (__sk_skb_reason_drop(NULL, segs, reason)) { skb_poison_list(segs); kfree_skb_add_bulk(segs, &sa, reason); } @@ -1433,6 +1450,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) if (!kasan_mempool_poison_object(skb)) return; + local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc->skb_cache[nc->skb_count++] = skb; if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { @@ -1444,6 +1462,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) nc->skb_cache + NAPI_SKB_CACHE_HALF); nc->skb_count = NAPI_SKB_CACHE_HALF; } + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); } void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason) @@ -1854,7 +1873,6 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) { - struct ubuf_info *orig_uarg = skb_zcopy(skb); int err, orig_len = skb->len; if (uarg->ops->link_skb) { @@ -1862,6 +1880,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, if (err) return err; } else { + struct ubuf_info *orig_uarg = skb_zcopy(skb); + /* An skb can only point to one uarg. This edge case happens * when TCP appends to an skb, but zerocopy_realloc triggered * a new alloc. @@ -1882,8 +1902,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, return err; } - if (!uarg->ops->link_skb) - skb_zcopy_set(skb, uarg, NULL); + skb_zcopy_set(skb, uarg, NULL); return skb->len - orig_len; } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); @@ -4139,6 +4158,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) if (skb_zcopy(tgt) || skb_zcopy(skb)) return 0; + DEBUG_NET_WARN_ON_ONCE(tgt->pp_recycle != skb->pp_recycle); + DEBUG_NET_WARN_ON_ONCE(skb_cmp_decrypted(tgt, skb)); + todo = shiftlen; from = 0; to = skb_shinfo(tgt)->nr_frags; |