summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBjörn Töpel <bjorn.topel@intel.com>2019-01-24 19:59:39 +0100
committerDaniel Borkmann <daniel@iogearbox.net>2019-01-25 01:50:03 +0100
commita36b38aa2af61146ea80980a01cf6e952ab021c1 (patch)
tree910d7378db7613ea6d36759dce35211dfb9766b8
parent50e74c0131a5b3a3e387798a5705158c04fb3bd0 (diff)
xsk: add sock_diag interface for AF_XDP
This patch adds the sock_diag interface for querying sockets from user space. Tools like iproute2 ss(8) can use this interface to list open AF_XDP sockets. The user-space ABI is defined in linux/xdp_diag.h and includes netlink request and response structs. The request can query sockets and the response contains socket information about the rings, umems, inode and more. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/uapi/linux/xdp_diag.h72
-rw-r--r--net/xdp/Kconfig8
-rw-r--r--net/xdp/Makefile1
-rw-r--r--net/xdp/xsk.c6
-rw-r--r--net/xdp/xsk.h12
-rw-r--r--net/xdp/xsk_diag.c191
6 files changed, 285 insertions, 5 deletions
diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
new file mode 100644
index 000000000000..78b2591a7782
--- /dev/null
+++ b/include/uapi/linux/xdp_diag.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * xdp_diag: interface for query/monitor XDP sockets
+ * Copyright(c) 2019 Intel Corporation.
+ */
+
+#ifndef _LINUX_XDP_DIAG_H
+#define _LINUX_XDP_DIAG_H
+
+#include <linux/types.h>
+
+struct xdp_diag_req {
+ __u8 sdiag_family;
+ __u8 sdiag_protocol;
+ __u16 pad;
+ __u32 xdiag_ino;
+ __u32 xdiag_show;
+ __u32 xdiag_cookie[2];
+};
+
+struct xdp_diag_msg {
+ __u8 xdiag_family;
+ __u8 xdiag_type;
+ __u16 pad;
+ __u32 xdiag_ino;
+ __u32 xdiag_cookie[2];
+};
+
+#define XDP_SHOW_INFO (1 << 0) /* Basic information */
+#define XDP_SHOW_RING_CFG (1 << 1)
+#define XDP_SHOW_UMEM (1 << 2)
+#define XDP_SHOW_MEMINFO (1 << 3)
+
+enum {
+ XDP_DIAG_NONE,
+ XDP_DIAG_INFO,
+ XDP_DIAG_UID,
+ XDP_DIAG_RX_RING,
+ XDP_DIAG_TX_RING,
+ XDP_DIAG_UMEM,
+ XDP_DIAG_UMEM_FILL_RING,
+ XDP_DIAG_UMEM_COMPLETION_RING,
+ XDP_DIAG_MEMINFO,
+ __XDP_DIAG_MAX,
+};
+
+#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
+
+struct xdp_diag_info {
+ __u32 ifindex;
+ __u32 queue_id;
+};
+
+struct xdp_diag_ring {
+ __u32 entries; /*num descs */
+};
+
+#define XDP_DU_F_ZEROCOPY (1 << 0)
+
+struct xdp_diag_umem {
+ __u64 size;
+ __u32 id;
+ __u32 num_pages;
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 ifindex;
+ __u32 queue_id;
+ __u32 flags;
+ __u32 refs;
+};
+
+#endif /* _LINUX_XDP_DIAG_H */
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
index 90e4a7152854..0255b33cff4b 100644
--- a/net/xdp/Kconfig
+++ b/net/xdp/Kconfig
@@ -5,3 +5,11 @@ config XDP_SOCKETS
help
XDP sockets allows a channel between XDP programs and
userspace applications.
+
+config XDP_SOCKETS_DIAG
+ tristate "XDP sockets: monitoring interface"
+ depends on XDP_SOCKETS
+ default n
+ help
+ Support for PF_XDP sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 04f073146256..59dbfdf93dca 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1 +1,2 @@
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 80ca48cefc42..949d3bbccb2f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -27,14 +27,10 @@
#include "xsk_queue.h"
#include "xdp_umem.h"
+#include "xsk.h"
#define TX_BATCH_SIZE 16
-static struct xdp_sock *xdp_sk(struct sock *sk)
-{
- return (struct xdp_sock *)sk;
-}
-
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
new file mode 100644
index 000000000000..ba8120610426
--- /dev/null
+++ b/net/xdp/xsk.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. */
+
+#ifndef XSK_H_
+#define XSK_H_
+
+static inline struct xdp_sock *xdp_sk(struct sock *sk)
+{
+ return (struct xdp_sock *)sk;
+}
+
+#endif /* XSK_H_ */
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
new file mode 100644
index 000000000000..661d007c3b28
--- /dev/null
+++ b/net/xdp/xsk_diag.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets monitoring support
+ *
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ * Author: Björn Töpel <bjorn.topel@intel.com>
+ */
+
+#include <linux/module.h>
+#include <net/xdp_sock.h>
+#include <linux/xdp_diag.h>
+#include <linux/sock_diag.h>
+
+#include "xsk_queue.h"
+#include "xsk.h"
+
+static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+ struct xdp_diag_info di = {};
+
+ di.ifindex = xs->dev ? xs->dev->ifindex : 0;
+ di.queue_id = xs->queue_id;
+ return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
+}
+
+static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
+ struct sk_buff *nlskb)
+{
+ struct xdp_diag_ring dr = {};
+
+ dr.entries = queue->nentries;
+ return nla_put(nlskb, nl_type, sizeof(dr), &dr);
+}
+
+static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
+ struct sk_buff *nlskb)
+{
+ int err = 0;
+
+ if (xs->rx)
+ err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
+ if (!err && xs->tx)
+ err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
+ return err;
+}
+
+static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+ struct xdp_umem *umem = xs->umem;
+ struct xdp_diag_umem du = {};
+ int err;
+
+ if (!umem)
+ return 0;
+
+ du.id = umem->id;
+ du.size = umem->size;
+ du.num_pages = umem->npgs;
+ du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+ du.headroom = umem->headroom;
+ du.ifindex = umem->dev ? umem->dev->ifindex : 0;
+ du.queue_id = umem->queue_id;
+ du.flags = 0;
+ if (umem->zc)
+ du.flags |= XDP_DU_F_ZEROCOPY;
+ du.refs = refcount_read(&umem->users);
+
+ err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
+
+ if (!err && umem->fq)
+ err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
+ if (!err && umem->cq) {
+ err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
+ nlskb);
+ }
+ return err;
+}
+
+static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
+ struct xdp_diag_req *req,
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+ struct xdp_diag_msg *msg;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
+ flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ msg = nlmsg_data(nlh);
+ memset(msg, 0, sizeof(*msg));
+ msg->xdiag_family = AF_XDP;
+ msg->xdiag_type = sk->sk_type;
+ msg->xdiag_ino = sk_ino;
+ sock_diag_save_cookie(sk, msg->xdiag_cookie);
+
+ if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_INFO) &&
+ nla_put_u32(nlskb, XDP_DIAG_UID,
+ from_kuid_munged(user_ns, sock_i_uid(sk))))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
+ xsk_diag_put_rings_cfg(xs, nlskb))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_UMEM) &&
+ xsk_diag_put_umem(xs, nlskb))
+ goto out_nlmsg_trim;
+
+ if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
+ sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
+ goto out_nlmsg_trim;
+
+ nlmsg_end(nlskb, nlh);
+ return 0;
+
+out_nlmsg_trim:
+ nlmsg_cancel(nlskb, nlh);
+ return -EMSGSIZE;
+}
+
+static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
+{
+ struct xdp_diag_req *req = nlmsg_data(cb->nlh);
+ struct net *net = sock_net(nlskb->sk);
+ int num = 0, s_num = cb->args[0];
+ struct sock *sk;
+
+ mutex_lock(&net->xdp.lock);
+
+ sk_for_each(sk, &net->xdp.list) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num++ < s_num)
+ continue;
+
+ if (xsk_diag_fill(sk, nlskb, req,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ sock_i_ino(sk)) < 0) {
+ num--;
+ break;
+ }
+ }
+
+ mutex_unlock(&net->xdp.lock);
+ cb->args[0] = num;
+ return nlskb->len;
+}
+
+static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
+{
+ struct netlink_dump_control c = { .dump = xsk_diag_dump };
+ int hdrlen = sizeof(struct xdp_diag_req);
+ struct net *net = sock_net(nlskb->sk);
+
+ if (nlmsg_len(hdr) < hdrlen)
+ return -EINVAL;
+
+ if (!(hdr->nlmsg_flags & NLM_F_DUMP))
+ return -EOPNOTSUPP;
+
+ return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
+}
+
+static const struct sock_diag_handler xsk_diag_handler = {
+ .family = AF_XDP,
+ .dump = xsk_diag_handler_dump,
+};
+
+static int __init xsk_diag_init(void)
+{
+ return sock_diag_register(&xsk_diag_handler);
+}
+
+static void __exit xsk_diag_exit(void)
+{
+ sock_diag_unregister(&xsk_diag_handler);
+}
+
+module_init(xsk_diag_init);
+module_exit(xsk_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);