bpf: make programs see skb->data == L2 for ingress and egress

eBPF programs attached to ingress and egress qdiscs see inconsistent skb->data. For ingress L2 header is already pulled, whereas for egress it's present. This is known to program writers which are currently forced to use BPF_LL_OFF workaround. Since programs don't change skb internal pointers it is safe to do pull/push right around invocation of the program and earlier taps and later pt->func() will not be affected. Multiple taps via packet_rcv(), tpacket_rcv() are doing the same trick around run_filter/BPF_PROG_RUN even if skb_shared. This fix finally allows programs to use optimized LD_ABS/IND instructions without BPF_LL_OFF for higher performance. tc ingress + cls_bpf + samples/bpf/tcbpf1_kern.o w/o JIT w/JIT before 20.5 23.6 Mpps after 21.8 26.6 Mpps Old programs with BPF_LL_OFF will still work as-is. We can now undo most of the earlier workaround commit: a166151cbe33 ("bpf: fix bpf helpers to use skb->mac_header relative offsets") Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Alexei Starovoitov <ast@plumgrid.com> 2015-06-04 10:11:53 -0700
committer: David S. Miller <davem@davemloft.net> 2015-06-07 02:01:33 -0700
commit: 3431205e03977aaf32bce6d4b16fb8244b510056 (patch)
tree: 0d9405acb8ac34a6bc5ad93a9b9ae674c4d3879e /net/sched
parent: 98da81a426a76c351f3c2854d5bc31f17bba3194 (diff)
2 files changed, 23 insertions, 2 deletions
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index dc6a2d324bd8..1d56903fd4c7 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -37,6 +37,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 {
 	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
+	bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
 
 	if (unlikely(!skb_mac_header_was_set(skb)))
 		return TC_ACT_UNSPEC;
@@ -48,7 +49,13 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
-	filter_res = BPF_PROG_RUN(prog->filter, skb);
+	if (at_ingress) {
+		__skb_push(skb, skb->mac_len);
+		filter_res = BPF_PROG_RUN(prog->filter, skb);
+		__skb_pull(skb, skb->mac_len);
+	} else {
+		filter_res = BPF_PROG_RUN(prog->filter, skb);
+	}
 	rcu_read_unlock();
 
 	/* A BPF program may overwrite the default action opcode.
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 91bd9c19471d..c79ecfd36e0f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -64,6 +64,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 {
 	struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
 	struct cls_bpf_prog *prog;
+#ifdef CONFIG_NET_CLS_ACT
+	bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
+#else
+	bool at_ingress = false;
+#endif
 	int ret = -1;
 
 	if (unlikely(!skb_mac_header_was_set(skb)))
@@ -72,7 +77,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
-		int filter_res = BPF_PROG_RUN(prog->filter, skb);
+		int filter_res;
+
+		if (at_ingress) {
+			/* It is safe to push/pull even if skb_shared() */
+			__skb_push(skb, skb->mac_len);
+			filter_res = BPF_PROG_RUN(prog->filter, skb);
+			__skb_pull(skb, skb->mac_len);
+		} else {
+			filter_res = BPF_PROG_RUN(prog->filter, skb);
+		}
 
 		if (filter_res == 0)
 			continue;
author	Alexei Starovoitov <ast@plumgrid.com>	2015-06-04 10:11:53 -0700
committer	David S. Miller <davem@davemloft.net>	2015-06-07 02:01:33 -0700
commit	3431205e03977aaf32bce6d4b16fb8244b510056 (patch)
tree	0d9405acb8ac34a6bc5ad93a9b9ae674c4d3879e /net/sched
parent	98da81a426a76c351f3c2854d5bc31f17bba3194 (diff)