summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-04-24 01:32:27 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2019-04-24 01:32:28 +0200
commita21b48a2f2afa53bbc989cce6fc81edbed39eab0 (patch)
tree017cfbea5a7d5be7c4503bd248b05c0858925d41
parent2aad32613c353b1e05da5994324bc5f20d0dd55a (diff)
parentf6ad6accaa99dfa7462d18687961b8421d707c1e (diff)
Merge branch 'bpf-proto-fixes'
Willem de Bruijn says: ==================== Expand the tc tunnel encap support with protocols that convert the network layer protocol, such as 6in4. This is analogous to existing support in bpf_skb_proto_6_to_4. Patch 1 implements the straightforward logic Patch 2 tests it with a 6in4 tunnel Changes v1->v2 - improve documentation in test ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--net/core/filter.c8
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c64
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh20
4 files changed, 88 insertions, 5 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index edb3a7c22f6c..2f88baf39cc2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3081,6 +3081,14 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
skb_set_transport_header(skb, mac_len + nh_len);
}
+
+ /* Match skb->protocol to new outer l3 protocol */
+ if (skb->protocol == htons(ETH_P_IP) &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ skb->protocol = htons(ETH_P_IPV6);
+ else if (skb->protocol == htons(ETH_P_IPV6) &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
+ skb->protocol = htons(ETH_P_IP);
}
if (skb_is_gso(skb)) {
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 8c976476f6fd..f7a0744db31e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -33,3 +33,4 @@ CONFIG_MPLS=y
CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
+CONFIG_IPV6_SIT=m
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index ab56a6a72b7a..74370e7e286d 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -77,17 +77,52 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ int tcp_off;
__u64 flags;
- if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
- sizeof(iph_inner)) < 0)
- return TC_ACT_OK;
+ /* Most tests encapsulate a packet into a tunnel with the same
+ * network protocol, and derive the outer header fields from
+ * the inner header.
+ *
+ * The 6in4 case tests different inner and outer protocols. As
+ * the inner is ipv6, but the outer expects an ipv4 header as
+ * input, manually build a struct iphdr based on the ipv6hdr.
+ */
+ if (encap_proto == IPPROTO_IPV6) {
+ const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
+ const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
+ struct ipv6hdr iph6_inner;
+
+ /* Read the IPv6 header */
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
+ sizeof(iph6_inner)) < 0)
+ return TC_ACT_OK;
+
+ /* Derive the IPv4 header fields from the IPv6 header */
+ memset(&iph_inner, 0, sizeof(iph_inner));
+ iph_inner.version = 4;
+ iph_inner.ihl = 5;
+ iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
+ bpf_ntohs(iph6_inner.payload_len));
+ iph_inner.ttl = iph6_inner.hop_limit - 1;
+ iph_inner.protocol = iph6_inner.nexthdr;
+ iph_inner.saddr = __bpf_constant_htonl(saddr);
+ iph_inner.daddr = __bpf_constant_htonl(daddr);
+
+ tcp_off = sizeof(iph6_inner);
+ } else {
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+ sizeof(iph_inner)) < 0)
+ return TC_ACT_OK;
+
+ tcp_off = sizeof(iph_inner);
+ }
/* filter only packets we want */
if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
return TC_ACT_OK;
- if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
&tcph, sizeof(tcph)) < 0)
return TC_ACT_OK;
@@ -129,6 +164,7 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
l2_len);
break;
case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
break;
default:
return TC_ACT_OK;
@@ -164,6 +200,17 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
BPF_F_INVALIDATE_HASH) < 0)
return TC_ACT_SHOT;
+ /* if changing outer proto type, update eth->h_proto */
+ if (encap_proto == IPPROTO_IPV6) {
+ struct ethhdr eth;
+
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
+ return TC_ACT_SHOT;
+ eth.h_proto = bpf_htons(ETH_P_IP);
+ if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
+ return TC_ACT_SHOT;
+ }
+
return TC_ACT_OK;
}
@@ -325,6 +372,15 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_sit_none")
+int __encap_sit_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
+ else
+ return TC_ACT_OK;
+}
+
SEC("encap_ip6tnl_none")
int __encap_ip6tnl_none(struct __sk_buff *skb)
{
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index d4d8d5d3b06e..ff0d31d38061 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -97,6 +97,9 @@ if [[ "$#" -eq "0" ]]; then
echo "ip6ip6"
$0 ipv6 ip6tnl none 100
+ echo "sit"
+ $0 ipv6 sit none 100
+
for mac in none mpls eth ; do
echo "ip gre $mac"
$0 ipv4 gre $mac 100
@@ -211,11 +214,20 @@ else
targs=""
fi
+# tunnel address family differs from inner for SIT
+if [[ "${tuntype}" == "sit" ]]; then
+ link_addr1="${ns1_v4}"
+ link_addr2="${ns2_v4}"
+else
+ link_addr1="${addr1}"
+ link_addr2="${addr2}"
+fi
+
# serverside, insert decap module
# server is still running
# client can connect again
ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
- ${tmode} remote "${addr1}" local "${addr2}" $targs
+ ${tmode} remote "${link_addr1}" local "${link_addr2}" $targs
expect_tun_fail=0
@@ -260,6 +272,12 @@ else
server_listen
fi
+# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
+if [[ "${tuntype}" == "sit" ]]; then
+ echo OK
+ exit 0
+fi
+
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact