diff options
author | Stanislav Fomichev <sdf@google.com> | 2023-11-27 11:03:07 -0800 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2023-11-29 14:59:40 -0800 |
commit | 341ac980eab90ac1f6c22ee9f9da83ed9604d899 (patch) | |
tree | 5b12e210a709a24f3b5b68ec466c5b6e044a81c6 | |
parent | 40d0eb0259ae77ace3e81d7454d1068c38bc95c2 (diff) |
xsk: Support tx_metadata_len
For zerocopy mode, tx_desc->addr can point to an arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.
Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).
The size of the metadata has mostly the same constraints as XDP:
- less than 256 bytes
- 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte
timestamp in the completion)
- non-zero
This data is not interpreted in any way right now.
Reviewed-by: Song Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-2-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | include/net/xdp_sock.h | 1 | ||||
-rw-r--r-- | include/net/xsk_buff_pool.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/if_xdp.h | 1 | ||||
-rw-r--r-- | net/xdp/xdp_umem.c | 4 | ||||
-rw-r--r-- | net/xdp/xsk.c | 12 | ||||
-rw-r--r-- | net/xdp/xsk_buff_pool.c | 1 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 17 | ||||
-rw-r--r-- | tools/include/uapi/linux/if_xdp.h | 1 |
8 files changed, 30 insertions, 8 deletions
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index f83128007fb0..bcf765124f72 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -30,6 +30,7 @@ struct xdp_umem { struct user_struct *user; refcount_t users; u8 flags; + u8 tx_metadata_len; bool zc; struct page **pgs; int id; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index b0bdff26fc88..1985ffaf9b0c 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -77,6 +77,7 @@ struct xsk_buff_pool { u32 chunk_size; u32 chunk_shift; u32 frame_len; + u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; bool dma_need_sync; diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 8d48863472b9..2ecf79282c26 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -76,6 +76,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics { diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 06cead2b8e34..946a687fb8e8 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; + if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) + return -EINVAL; + umem->size = size; umem->headroom = headroom; umem->chunk_size = chunk_size; @@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; + umem->tx_metadata_len = mr->tx_metadata_len; INIT_LIST_HEAD(&umem->xsk_dma_list); refcount_set(&umem->users, 1); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ae9f8cb611f6..c904356e2800 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 { __u32 headroom; }; +struct xdp_umem_reg_v2 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; + __u32 flags; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(struct xdp_umem_reg_v1)) return -EINVAL; - else if (optlen < sizeof(mr)) + else if (optlen < sizeof(struct xdp_umem_reg_v2)) mr_size = sizeof(struct xdp_umem_reg_v1); + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v2); if (copy_from_sockptr(&mr, optval, mr_size)) return -EFAULT; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 49cb9f9a09be..386eddcdf837 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, XDP_PACKET_HEADROOM; pool->umem = umem; pool->addrs = umem->addrs; + pool->tx_metadata_len = umem->tx_metadata_len; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 13354a1e4280..c74a1372bcb9 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options) static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 offset = desc->addr & (pool->chunk_size - 1); + u64 addr = desc->addr - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; + u64 offset = addr & (pool->chunk_size - 1); if (!desc->len) return false; - if (offset + desc->len > pool->chunk_size) + if (offset + len > pool->chunk_size) return false; - if (desc->addr >= pool->addrs_cnt) + if (addr >= pool->addrs_cnt) return false; if (xp_unused_options_set(desc->options)) @@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; if (!desc->len) return false; - if (desc->len > pool->chunk_size) + if (len > pool->chunk_size) return false; - if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || - xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) + if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || + xp_desc_crosses_non_contig_pg(pool, addr, len)) return false; if (xp_unused_options_set(desc->options)) diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 73a47da885dc..34411a2e5b6c 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -76,6 +76,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics { |