diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2019-04-24 01:32:27 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2019-04-24 01:32:28 +0200 |
commit | a21b48a2f2afa53bbc989cce6fc81edbed39eab0 (patch) | |
tree | 017cfbea5a7d5be7c4503bd248b05c0858925d41 | |
parent | 2aad32613c353b1e05da5994324bc5f20d0dd55a (diff) | |
parent | f6ad6accaa99dfa7462d18687961b8421d707c1e (diff) |
Merge branch 'bpf-proto-fixes'
Willem de Bruijn says:
====================
Expand the tc tunnel encap support with protocols that convert the
network layer protocol, such as 6in4. This is analogous to existing
support in bpf_skb_proto_6_to_4.
Patch 1 implements the straightforward logic
Patch 2 tests it with a 6in4 tunnel
Changes v1->v2
- improve documentation in test
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r-- | net/core/filter.c | 8 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/config | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 64 | ||||
-rwxr-xr-x | tools/testing/selftests/bpf/test_tc_tunnel.sh | 20 |
4 files changed, 88 insertions, 5 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index edb3a7c22f6c..2f88baf39cc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3081,6 +3081,14 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, skb_set_transport_header(skb, mac_len + nh_len); } + + /* Match skb->protocol to new outer l3 protocol */ + if (skb->protocol == htons(ETH_P_IP) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (skb->protocol == htons(ETH_P_IPV6) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); } if (skb_is_gso(skb)) { diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 8c976476f6fd..f7a0744db31e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -33,3 +33,4 @@ CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m +CONFIG_IPV6_SIT=m diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index ab56a6a72b7a..74370e7e286d 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -77,17 +77,52 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, struct v4hdr h_outer; struct tcphdr tcph; int olen, l2_len; + int tcp_off; __u64 flags; - if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, - sizeof(iph_inner)) < 0) - return TC_ACT_OK; + /* Most tests encapsulate a packet into a tunnel with the same + * network protocol, and derive the outer header fields from + * the inner header. + * + * The 6in4 case tests different inner and outer protocols. As + * the inner is ipv6, but the outer expects an ipv4 header as + * input, manually build a struct iphdr based on the ipv6hdr. + */ + if (encap_proto == IPPROTO_IPV6) { + const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1; + const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2; + struct ipv6hdr iph6_inner; + + /* Read the IPv6 header */ + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner, + sizeof(iph6_inner)) < 0) + return TC_ACT_OK; + + /* Derive the IPv4 header fields from the IPv6 header */ + memset(&iph_inner, 0, sizeof(iph_inner)); + iph_inner.version = 4; + iph_inner.ihl = 5; + iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) + + bpf_ntohs(iph6_inner.payload_len)); + iph_inner.ttl = iph6_inner.hop_limit - 1; + iph_inner.protocol = iph6_inner.nexthdr; + iph_inner.saddr = __bpf_constant_htonl(saddr); + iph_inner.daddr = __bpf_constant_htonl(daddr); + + tcp_off = sizeof(iph6_inner); + } else { + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + tcp_off = sizeof(iph_inner); + } /* filter only packets we want */ if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP) return TC_ACT_OK; - if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), + if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off, &tcph, sizeof(tcph)) < 0) return TC_ACT_OK; @@ -129,6 +164,7 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, l2_len); break; case IPPROTO_IPIP: + case IPPROTO_IPV6: break; default: return TC_ACT_OK; @@ -164,6 +200,17 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, BPF_F_INVALIDATE_HASH) < 0) return TC_ACT_SHOT; + /* if changing outer proto type, update eth->h_proto */ + if (encap_proto == IPPROTO_IPV6) { + struct ethhdr eth; + + if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0) + return TC_ACT_SHOT; + eth.h_proto = bpf_htons(ETH_P_IP); + if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0) + return TC_ACT_SHOT; + } + return TC_ACT_OK; } @@ -325,6 +372,15 @@ int __encap_udp_eth(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("encap_sit_none") +int __encap_sit_none(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP); + else + return TC_ACT_OK; +} + SEC("encap_ip6tnl_none") int __encap_ip6tnl_none(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index d4d8d5d3b06e..ff0d31d38061 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -97,6 +97,9 @@ if [[ "$#" -eq "0" ]]; then echo "ip6ip6" $0 ipv6 ip6tnl none 100 + echo "sit" + $0 ipv6 sit none 100 + for mac in none mpls eth ; do echo "ip gre $mac" $0 ipv4 gre $mac 100 @@ -211,11 +214,20 @@ else targs="" fi +# tunnel address family differs from inner for SIT +if [[ "${tuntype}" == "sit" ]]; then + link_addr1="${ns1_v4}" + link_addr2="${ns2_v4}" +else + link_addr1="${addr1}" + link_addr2="${addr2}" +fi + # serverside, insert decap module # server is still running # client can connect again ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \ - ${tmode} remote "${addr1}" local "${addr2}" $targs + ${tmode} remote "${link_addr1}" local "${link_addr2}" $targs expect_tun_fail=0 @@ -260,6 +272,12 @@ else server_listen fi +# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3. +if [[ "${tuntype}" == "sit" ]]; then + echo OK + exit 0 +fi + # serverside, use BPF for decap ip netns exec "${ns2}" ip link del dev testtun0 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact |