aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-09-30 11:50:35 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-09-30 11:50:35 -0700
commitea7da1d563f11ee5f853fd19fc178ff17e990d09 (patch)
tree3115443403643360337a3687c68186f44dda004a /tools
parent963ec27a10fa6dc700c0465e95965c0bf696a606 (diff)
parenteef4a011f35d3aa657d4995c53ccb240d9eaea73 (diff)
downloadkernel_replicant_linux-ea7da1d563f11ee5f853fd19fc178ff17e990d09.tar.gz
kernel_replicant_linux-ea7da1d563f11ee5f853fd19fc178ff17e990d09.tar.bz2
kernel_replicant_linux-ea7da1d563f11ee5f853fd19fc178ff17e990d09.zip
Merge branch 'Various BPF helper improvements'
Daniel Borkmann says: ==================== This series adds two BPF helpers, that is, one for retrieving the classid of an skb and another one to redirect via the neigh subsystem, and improves also the cookie helpers by removing the atomic counter. I've also added the bpf_tail_call_static() helper to the libbpf API that we've been using in Cilium for a while now, and last but not least the series adds a few selftests. For details, please check individual patches, thanks! v3 -> v4: - Removed out_rec error path (Martin) - Integrate BPF_F_NEIGH flag into rejecting invalid flags (Martin) - I think this way it's better to avoid bit overlaps given it's right in the place that would need to be extended on new flags v2 -> v3: - Removed double skb->dev = dev assignment (David) - Added headroom check for v6 path (David) - Set set flowi4_proto for ip_route_output_flow (David) - Rebased onto latest bpf-next v1 -> v2: - Rework cookie generator to support nested contexts (Eric) - Use ip_neigh_gw6() and container_of() (David) - Rename __throw_build_bug() and improve comments (Andrii) - Use bpf_tail_call_static() also in BPF samples (Maciej) ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/include/uapi/linux/bpf.h24
-rw-r--r--tools/lib/bpf/bpf_helpers.h46
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c12
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall1.c28
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall2.c14
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c6
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c144
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_neigh.sh168
12 files changed, 422 insertions, 40 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2b1d3f16cbd1..1f17c6752deb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3643,6 +3643,28 @@ union bpf_attr {
* *flags* are identical to those used for bpf_snprintf_btf.
* Return
* 0 on success or a negative error in case of failure.
+ *
+ * u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * See **bpf_get_cgroup_classid**\ () for the main description.
+ * This helper differs from **bpf_get_cgroup_classid**\ () in that
+ * the cgroup v1 net_cls class is retrieved only from the *skb*'s
+ * associated socket instead of the current process.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*
+ * and fill in L2 addresses from neighboring subsystem. This helper
+ * is somewhat similar to **bpf_redirect**\ (), except that it
+ * fills in e.g. MAC addresses based on the L3 information from
+ * the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * The *flags* argument is reserved and must be 0. The helper is
+ * currently only supported for tc BPF program types.
+ * Return
+ * The helper returns **TC_ACT_REDIRECT** on success or
+ * **TC_ACT_SHOT** on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3796,6 +3818,8 @@ union bpf_attr {
FN(copy_from_user), \
FN(snprintf_btf), \
FN(seq_printf_btf), \
+ FN(skb_cgroup_classid), \
+ FN(redirect_neigh), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 1106777df00b..2bdb7d6dbad2 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -54,6 +54,52 @@
#endif
/*
+ * Helper macro to throw a compilation error if __bpf_unreachable() gets
+ * built into the resulting code. This works given BPF back end does not
+ * implement __builtin_trap(). This is useful to assert that certain paths
+ * of the program code are never used and hence eliminated by the compiler.
+ *
+ * For example, consider a switch statement that covers known cases used by
+ * the program. __bpf_unreachable() can then reside in the default case. If
+ * the program gets extended such that a case is not covered in the switch
+ * statement, then it will throw a build error due to the default case not
+ * being compiled out.
+ */
+#ifndef __bpf_unreachable
+# define __bpf_unreachable() __builtin_trap()
+#endif
+
+/*
+ * Helper function to perform a tail call with a constant/immediate map slot.
+ */
+static __always_inline void
+bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
+{
+ if (!__builtin_constant_p(slot))
+ __bpf_unreachable();
+
+ /*
+ * Provide a hard guarantee that LLVM won't optimize setting r2 (map
+ * pointer) and r3 (constant map index) from _different paths_ ending
+ * up at the _same_ call insn as otherwise we won't be able to use the
+ * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
+ * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
+ * tracking for prog array pokes") for details on verifier tracking.
+ *
+ * Note on clobber list: we need to stay in-line with BPF calling
+ * convention, so even if we don't end up using r0, r4, r5, we need
+ * to mark them as clobber so that LLVM doesn't end up using them
+ * before / after the call.
+ */
+ asm volatile("r1 = %[ctx]\n\t"
+ "r2 = %[map]\n\t"
+ "r3 = %[slot]\n\t"
+ "call 12"
+ :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
+ : "r0", "r1", "r2", "r3", "r4", "r5");
+}
+
+/*
* Helper structure used by eBPF C program
* to describe BPF map attributes to libbpf loader
*/
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index de6de9221518..5a65f6b51377 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
switch (proto) {
case bpf_htons(ETH_P_IP):
- bpf_tail_call(skb, &jmp_table, IP);
+ bpf_tail_call_static(skb, &jmp_table, IP);
break;
case bpf_htons(ETH_P_IPV6):
- bpf_tail_call(skb, &jmp_table, IPV6);
+ bpf_tail_call_static(skb, &jmp_table, IPV6);
break;
case bpf_htons(ETH_P_MPLS_MC):
case bpf_htons(ETH_P_MPLS_UC):
- bpf_tail_call(skb, &jmp_table, MPLS);
+ bpf_tail_call_static(skb, &jmp_table, MPLS);
break;
case bpf_htons(ETH_P_8021Q):
case bpf_htons(ETH_P_8021AD):
- bpf_tail_call(skb, &jmp_table, VLAN);
+ bpf_tail_call_static(skb, &jmp_table, VLAN);
break;
default:
/* Protocol not supported */
@@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
- bpf_tail_call(skb, &jmp_table, IPV6OP);
+ bpf_tail_call_static(skb, &jmp_table, IPV6OP);
break;
case IPPROTO_FRAGMENT:
- bpf_tail_call(skb, &jmp_table, IPV6FR);
+ bpf_tail_call_static(skb, &jmp_table, IPV6FR);
break;
default:
return parse_ip_proto(skb, nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
index 1f407e65ae52..7115bcefbe8a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb)
/* Multiple locations to make sure we patch
* all of them.
*/
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
-
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
-
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 3;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
index a093e739cf0e..0431e4fe7efd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -13,14 +13,14 @@ struct {
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
return 0;
}
SEC("classifier/1")
int bpf_func_1(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 1;
}
@@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb)
SEC("classifier/3")
int bpf_func_3(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 4);
+ bpf_tail_call_static(skb, &jmp_table, 4);
return 3;
}
SEC("classifier/4")
int bpf_func_4(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 4;
}
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
/* Check multi-prog update. */
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
/* Check tail call limit. */
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 3;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index cabda877cf0a..739dc2a51e74 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -16,14 +16,14 @@ SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
{
count++;
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 1;
}
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
index b5d9c8e778ae..0103f3dd9f02 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -21,7 +21,7 @@ TAIL_FUNC(1)
static __noinline
int subprog_tail(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return skb->len * 2;
}
@@ -29,7 +29,7 @@ int subprog_tail(struct __sk_buff *skb)
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
return subprog_tail(skb);
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
index a004ab28ce28..7b1c04183824 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -14,9 +14,9 @@ static __noinline
int subprog_tail(struct __sk_buff *skb)
{
if (load_byte(skb, 0))
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
else
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 1;
}
@@ -32,7 +32,7 @@ int bpf_func_0(struct __sk_buff *skb)
SEC("classifier")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
index 96dbef2b6b7c..0d5482bea6c9 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -16,9 +16,9 @@ int subprog_tail2(struct __sk_buff *skb)
volatile char arr[64] = {};
if (load_word(skb, 0) || load_half(skb, 0))
- bpf_tail_call(skb, &jmp_table, 10);
+ bpf_tail_call_static(skb, &jmp_table, 10);
else
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
return skb->len;
}
@@ -28,7 +28,7 @@ int subprog_tail(struct __sk_buff *skb)
{
volatile char arr[64] = {};
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return skb->len * 2;
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index 98b40a95bc67..9a1b166b7fbe 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -14,21 +14,21 @@ static volatile int count;
__noinline
int subprog_tail_2(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return skb->len * 3;
}
__noinline
int subprog_tail_1(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
return skb->len * 2;
}
__noinline
int subprog_tail(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return skb->len;
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
new file mode 100644
index 000000000000..889a72c3024f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef barrier_data
+# define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory")
+#endif
+
+#ifndef ctx_ptr
+# define ctx_ptr(field) (void *)(long)(field)
+#endif
+
+#define dst_to_src_tmp 0xeeddddeeU
+#define src_to_dst_tmp 0xeeffffeeU
+
+#define ip4_src 0xac100164 /* 172.16.1.100 */
+#define ip4_dst 0xac100264 /* 172.16.2.100 */
+
+#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#ifndef v6_equal
+# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
+ a.s6_addr32[1] == b.s6_addr32[1] && \
+ a.s6_addr32[2] == b.s6_addr32[2] && \
+ a.s6_addr32[3] == b.s6_addr32[3])
+#endif
+
+static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
+ __be32 addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct iphdr *ip4h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip4h + 1) > data_end)
+ return false;
+
+ return ip4h->daddr == addr;
+}
+
+static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
+ struct in6_addr addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct ipv6hdr *ip6h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip6h + 1) > data_end)
+ return false;
+
+ return v6_equal(ip6h->daddr, addr);
+}
+
+SEC("chk_neigh") int tc_chk(struct __sk_buff *skb)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u32 *raw = data;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return TC_ACT_SHOT;
+
+ return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+ int idx = dst_to_src_tmp;
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src);
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ barrier_data(&idx);
+ idx = bpf_ntohl(idx);
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(idx, 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+ int idx = src_to_dst_tmp;
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst);
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ barrier_data(&idx);
+ idx = bpf_ntohl(idx);
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(idx, 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_neigh.sh b/tools/testing/selftests/bpf/test_tc_neigh.sh
new file mode 100755
index 000000000000..31d8c3df8b24
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_neigh.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+# between src and dst. The netns fwd has veth links to each src and dst. The
+# client is in src and server in dst. The test installs a TC BPF program to each
+# host facing veth in fwd which calls into bpf_redirect_peer() to perform the
+# neigh addr population and redirect; it also installs a dropper prog on the
+# egress side to drop skbs if neigh addrs were not populated.
+
+if [[ $EUID -ne 0 ]]; then
+ echo "This script must be run as root"
+ echo "FAIL"
+ exit 1
+fi
+
+# check that nc, dd, ping, ping6 and timeout are present
+command -v nc >/dev/null 2>&1 || \
+ { echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+ { echo >&2 "dd is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+ { echo >&2 "timeout is not available"; exit 1; }
+command -v ping >/dev/null 2>&1 || \
+ { echo >&2 "ping is not available"; exit 1; }
+command -v ping6 >/dev/null 2>&1 || \
+ { echo >&2 "ping6 is not available"; exit 1; }
+
+readonly GREEN='\033[0;92m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m' # No Color
+
+readonly PING_ARG="-c 3 -w 10 -q"
+
+readonly TIMEOUT=10
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP4_SRC="172.16.1.100"
+readonly IP4_DST="172.16.2.100"
+
+readonly IP6_SRC="::1:dead:beef:cafe"
+readonly IP6_DST="::2:dead:beef:cafe"
+
+readonly IP4_SLL="169.254.0.1"
+readonly IP4_DLL="169.254.0.2"
+readonly IP4_NET="169.254.0.0"
+
+cleanup()
+{
+ ip netns del ${NS_SRC}
+ ip netns del ${NS_FWD}
+ ip netns del ${NS_DST}
+}
+
+trap cleanup EXIT
+
+set -e
+
+ip netns add "${NS_SRC}"
+ip netns add "${NS_FWD}"
+ip netns add "${NS_DST}"
+
+ip link add veth_src type veth peer name veth_src_fwd
+ip link add veth_dst type veth peer name veth_dst_fwd
+
+ip link set veth_src netns ${NS_SRC}
+ip link set veth_src_fwd netns ${NS_FWD}
+
+ip link set veth_dst netns ${NS_DST}
+ip link set veth_dst_fwd netns ${NS_FWD}
+
+ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
+ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
+
+# The fwd netns automatically get a v6 LL address / routes, but also needs v4
+# one in order to start ARP probing. IP4_NET route is added to the endpoints
+# so that the ARP processing will reply.
+
+ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
+ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
+
+ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
+ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
+
+ip -netns ${NS_SRC} link set dev veth_src up
+ip -netns ${NS_FWD} link set dev veth_src_fwd up
+
+ip -netns ${NS_DST} link set dev veth_dst up
+ip -netns ${NS_FWD} link set dev veth_dst_fwd up
+
+ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
+ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
+ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
+
+ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
+ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
+
+ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
+ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
+ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
+
+ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
+ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
+
+fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
+fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
+
+ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
+ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
+
+ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
+ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
+
+veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex | awk '{printf "%08x\n", $1}')
+veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex | awk '{printf "%08x\n", $1}')
+
+xxd -p < test_tc_neigh.o | sed "s/eeddddee/$veth_src/g" | xxd -r -p > test_tc_neigh.x.o
+xxd -p < test_tc_neigh.x.o | sed "s/eeffffee/$veth_dst/g" | xxd -r -p > test_tc_neigh.y.o
+
+ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
+ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj test_tc_neigh.y.o sec src_ingress
+ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh
+
+ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
+ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj test_tc_neigh.y.o sec dst_ingress
+ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj test_tc_neigh.y.o sec chk_neigh
+
+rm -f test_tc_neigh.x.o test_tc_neigh.y.o
+
+ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
+ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
+
+set +e
+
+TEST="TCPv4 connectivity test"
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
+if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+TEST="TCPv6 connectivity test"
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
+if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+TEST="ICMPv4 connectivity test"
+ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST}
+if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+TEST="ICMPv6 connectivity test"
+ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
+if [ $? -ne 0 ]; then
+ echo -e "${TEST}: ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "${TEST}: ${GREEN}PASS${NC}"