1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
|
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (C) 2024 Intel Corporation */
#ifndef __LIBETH_RX_H
#define __LIBETH_RX_H
#include <linux/if_vlan.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
/* Rx buffer management */
/* Space reserved in front of each frame */
#define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
/* Maximum headroom for worst-case calculations */
#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM
/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
/* Always use order-0 pages */
#define LIBETH_RX_PAGE_ORDER 0
/* Pick a sane buffer stride and align to a cacheline boundary */
#define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128)
/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
#define LIBETH_RX_PAGE_LEN(hr) \
ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \
LIBETH_RX_BUF_STRIDE)
/**
* struct libeth_fqe - structure representing an Rx buffer (fill queue element)
* @page: page holding the buffer
* @offset: offset from the page start (to the headroom)
* @truesize: total space occupied by the buffer (w/ headroom and tailroom)
*
* Depending on the MTU, API switches between one-page-per-frame and shared
* page model (to conserve memory on bigger-page platforms). In case of the
* former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
*/
struct libeth_fqe {
struct page *page;
u32 offset;
u32 truesize;
} __aligned_largest;
/**
* struct libeth_fq - structure representing a buffer (fill) queue
* @fp: hotpath part of the structure
* @pp: &page_pool for buffer management
* @fqes: array of Rx buffers
* @truesize: size to allocate per buffer, w/overhead
* @count: number of descriptors/buffers the queue has
* @buf_len: HW-writeable length per each buffer
* @nid: ID of the closest NUMA node with memory
*/
struct libeth_fq {
struct_group_tagged(libeth_fq_fp, fp,
struct page_pool *pp;
struct libeth_fqe *fqes;
u32 truesize;
u32 count;
);
/* Cold fields */
u32 buf_len;
int nid;
};
int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
void libeth_rx_fq_destroy(struct libeth_fq *fq);
/**
* libeth_rx_alloc - allocate a new Rx buffer
* @fq: fill queue to allocate for
* @i: index of the buffer within the queue
*
* Return: DMA address to be passed to HW for Rx on successful allocation,
* ```DMA_MAPPING_ERROR``` otherwise.
*/
static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
{
struct libeth_fqe *buf = &fq->fqes[i];
buf->truesize = fq->truesize;
buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
if (unlikely(!buf->page))
return DMA_MAPPING_ERROR;
return page_pool_get_dma_addr(buf->page) + buf->offset +
fq->pp->p.offset;
}
void libeth_rx_recycle_slow(struct page *page);
/**
* libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
* @fqe: buffer to process
* @len: frame length from the descriptor
*
* Process the buffer after it's written by HW. The regular path is to
* synchronize DMA for CPU, but in case of no data it will be immediately
* recycled back to its PP.
*
* Return: true when there's data to process, false otherwise.
*/
static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
u32 len)
{
struct page *page = fqe->page;
/* Very rare, but possible case. The most common reason:
* the last fragment contained FCS only, which was then
* stripped by the HW.
*/
if (unlikely(!len)) {
libeth_rx_recycle_slow(page);
return false;
}
page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
return true;
}
/* Converting abstract packet type numbers into a software structure with
* the packet parameters to do O(1) lookup on Rx.
*/
enum {
LIBETH_RX_PT_OUTER_L2 = 0U,
LIBETH_RX_PT_OUTER_IPV4,
LIBETH_RX_PT_OUTER_IPV6,
};
enum {
LIBETH_RX_PT_NOT_FRAG = 0U,
LIBETH_RX_PT_FRAG,
};
enum {
LIBETH_RX_PT_TUNNEL_IP_NONE = 0U,
LIBETH_RX_PT_TUNNEL_IP_IP,
LIBETH_RX_PT_TUNNEL_IP_GRENAT,
LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
};
enum {
LIBETH_RX_PT_TUNNEL_END_NONE = 0U,
LIBETH_RX_PT_TUNNEL_END_IPV4,
LIBETH_RX_PT_TUNNEL_END_IPV6,
};
enum {
LIBETH_RX_PT_INNER_NONE = 0U,
LIBETH_RX_PT_INNER_UDP,
LIBETH_RX_PT_INNER_TCP,
LIBETH_RX_PT_INNER_SCTP,
LIBETH_RX_PT_INNER_ICMP,
LIBETH_RX_PT_INNER_TIMESYNC,
};
#define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE
#define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2
#define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3
#define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4
struct libeth_rx_pt {
u32 outer_ip:2;
u32 outer_frag:1;
u32 tunnel_type:3;
u32 tunnel_end_prot:2;
u32 tunnel_end_frag:1;
u32 inner_prot:3;
enum pkt_hash_types payload_layer:2;
u32 pad:2;
enum xdp_rss_hash_type hash_type:16;
};
void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
/**
* libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
* @pt: packet type params
*
* Wrapper to compile out the IPv6 code from the drivers when not supported
* by the kernel.
*
* Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
*/
static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
{
#if !IS_ENABLED(CONFIG_IPV6)
switch (pt.outer_ip) {
case LIBETH_RX_PT_OUTER_IPV4:
return LIBETH_RX_PT_OUTER_IPV4;
default:
return LIBETH_RX_PT_OUTER_L2;
}
#else
return pt.outer_ip;
#endif
}
/* libeth_has_*() can be used to quickly check whether the HW metadata is
* available to avoid further expensive processing such as descriptor reads.
* They already check for the corresponding netdev feature to be enabled,
* thus can be used as drop-in replacements.
*/
static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
struct libeth_rx_pt pt)
{
/* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
* it is enough to check only for the L4 type.
*/
return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
(dev->features & NETIF_F_RXCSUM));
}
static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
struct libeth_rx_pt pt)
{
return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
(dev->features & NETIF_F_RXHASH));
}
/**
* libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
* @skb: skb to fill the hash in
* @hash: 32-bit hash value from the descriptor
* @pt: packet type
*/
static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
struct libeth_rx_pt pt)
{
skb_set_hash(skb, hash, pt.payload_layer);
}
#endif /* __LIBETH_RX_H */
|