aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--etc/iproute2/bpf_pinning6
-rw-r--r--examples/bpf/README13
-rw-r--r--examples/bpf/bpf_cyclic.c30
-rw-r--r--examples/bpf/bpf_funcs.h58
-rw-r--r--examples/bpf/bpf_graft.c67
-rw-r--r--examples/bpf/bpf_prog.c33
-rw-r--r--examples/bpf/bpf_shared.c48
-rw-r--r--examples/bpf/bpf_shared.h6
-rw-r--r--examples/bpf/bpf_tailcall.c99
-rw-r--r--include/bpf_api.h225
-rw-r--r--include/bpf_elf.h6
-rw-r--r--include/utils.h7
-rw-r--r--lib/rt_names.c5
-rw-r--r--tc/e_bpf.c46
-rw-r--r--tc/f_bpf.c131
-rw-r--r--tc/m_bpf.c158
-rw-r--r--tc/tc_bpf.c1619
-rw-r--r--tc/tc_bpf.h74
18 files changed, 1947 insertions, 684 deletions
diff --git a/etc/iproute2/bpf_pinning b/etc/iproute2/bpf_pinning
new file mode 100644
index 0000000..2b39c70
--- /dev/null
+++ b/etc/iproute2/bpf_pinning
@@ -0,0 +1,6 @@
+#
+# subpath mappings from mount point for pinning
+#
+#3 tracing
+#4 foo/bar
+#5 tc/cls1
diff --git a/examples/bpf/README b/examples/bpf/README
new file mode 100644
index 0000000..4247257
--- /dev/null
+++ b/examples/bpf/README
@@ -0,0 +1,13 @@
+eBPF toy code examples (running in kernel) to familiarize yourself
+with syntax and features:
+
+ - bpf_prog.c -> Classifier examples with using maps
+ - bpf_shared.c -> Ingress/egress map sharing example
+ - bpf_tailcall.c -> Using tail call chains
+ - bpf_cyclic.c -> Simple cycle as tail calls
+ - bpf_graft.c -> Demo on altering runtime behaviour
+
+User space code example:
+
+ - bpf_agent.c -> Counterpart to bpf_prog.c for user
+ space to transfer/read out map data
diff --git a/examples/bpf/bpf_cyclic.c b/examples/bpf/bpf_cyclic.c
new file mode 100644
index 0000000..c66cbec
--- /dev/null
+++ b/examples/bpf/bpf_cyclic.c
@@ -0,0 +1,30 @@
+#include "../../include/bpf_api.h"
+
+/* Cyclic dependency example to test the kernel's runtime upper
+ * bound on loops. Also demonstrates on how to use direct-actions,
+ * loaded as: tc filter add [...] bpf da obj [...]
+ */
+#define JMP_MAP_ID 0xabccba
+
+BPF_PROG_ARRAY(jmp_tc, JMP_MAP_ID, PIN_OBJECT_NS, 1);
+
+__section_tail(JMP_MAP_ID, 0)
+int cls_loop(struct __sk_buff *skb)
+{
+ char fmt[] = "cb: %u\n";
+
+ trace_printk(fmt, sizeof(fmt), skb->cb[0]++);
+ tail_call(skb, &jmp_tc, 0);
+
+ skb->tc_classid = TC_H_MAKE(1, 42);
+ return TC_ACT_OK;
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+ tail_call(skb, &jmp_tc, 0);
+ return TC_ACT_SHOT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_funcs.h b/examples/bpf/bpf_funcs.h
deleted file mode 100644
index 1545fa9..0000000
--- a/examples/bpf/bpf_funcs.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef __BPF_FUNCS__
-#define __BPF_FUNCS__
-
-/* Misc macros. */
-#ifndef __maybe_unused
-# define __maybe_unused __attribute__ ((__unused__))
-#endif
-
-#ifndef __section
-# define __section(NAME) __attribute__((section(NAME), used))
-#endif
-
-#ifndef offsetof
-# define offsetof __builtin_offsetof
-#endif
-
-#ifndef htons
-# define htons(x) __constant_htons((x))
-#endif
-
-#ifndef likely
-# define likely(x) __builtin_expect(!!(x), 1)
-#endif
-
-#ifndef unlikely
-# define unlikely(x) __builtin_expect(!!(x), 0)
-#endif
-
-/* The verifier will translate them to actual function calls. */
-static void *(*bpf_map_lookup_elem)(void *map, void *key) __maybe_unused =
- (void *) BPF_FUNC_map_lookup_elem;
-
-static int (*bpf_map_update_elem)(void *map, void *key, void *value,
- unsigned long long flags) __maybe_unused =
- (void *) BPF_FUNC_map_update_elem;
-
-static int (*bpf_map_delete_elem)(void *map, void *key) __maybe_unused =
- (void *) BPF_FUNC_map_delete_elem;
-
-static unsigned int (*get_smp_processor_id)(void) __maybe_unused =
- (void *) BPF_FUNC_get_smp_processor_id;
-
-static unsigned int (*get_prandom_u32)(void) __maybe_unused =
- (void *) BPF_FUNC_get_prandom_u32;
-
-/* LLVM built-in functions that an eBPF C program may use to emit
- * BPF_LD_ABS and BPF_LD_IND instructions.
- */
-unsigned long long load_byte(void *skb, unsigned long long off)
- asm ("llvm.bpf.load.byte");
-
-unsigned long long load_half(void *skb, unsigned long long off)
- asm ("llvm.bpf.load.half");
-
-unsigned long long load_word(void *skb, unsigned long long off)
- asm ("llvm.bpf.load.word");
-
-#endif /* __BPF_FUNCS__ */
diff --git a/examples/bpf/bpf_graft.c b/examples/bpf/bpf_graft.c
new file mode 100644
index 0000000..f48fd02
--- /dev/null
+++ b/examples/bpf/bpf_graft.c
@@ -0,0 +1,67 @@
+#include "../../include/bpf_api.h"
+
+/* This example demonstrates how classifier run-time behaviour
+ * can be altered with tail calls. We start out with an empty
+ * jmp_tc array, then add section aaa to the array slot 0, and
+ * later on atomically replace it with section bbb. Note that
+ * as shown in other examples, the tc loader can prepopulate
+ * tail called sections, here we start out with an empty one
+ * on purpose to show it can also be done this way.
+ *
+ * tc filter add dev foo parent ffff: bpf obj graft.o
+ * tc exec bpf dbg
+ * [...]
+ * Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough
+ * <idle>-0 [001] ..s. 138993.202265: : fallthrough
+ * Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough
+ * [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa
+ * tc exec bpf dbg
+ * [...]
+ * Socket Thread-19818 [002] ..s. 139012.053587: : aaa
+ * <idle>-0 [002] ..s. 139012.172359: : aaa
+ * Socket Thread-19818 [001] ..s. 139012.173556: : aaa
+ * [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb
+ * tc exec bpf dbg
+ * [...]
+ * Socket Thread-19818 [002] ..s. 139022.102967: : bbb
+ * <idle>-0 [002] ..s. 139022.155640: : bbb
+ * Socket Thread-19818 [001] ..s. 139022.156730: : bbb
+ * [...]
+ */
+
+BPF_PROG_ARRAY(jmp_tc, 0, PIN_GLOBAL_NS, 1);
+
+__section("aaa")
+int cls_aaa(struct __sk_buff *skb)
+{
+ char fmt[] = "aaa\n";
+
+ trace_printk(fmt, sizeof(fmt));
+ return TC_H_MAKE(1, 42);
+}
+
+__section("bbb")
+int cls_bbb(struct __sk_buff *skb)
+{
+ char fmt[] = "bbb\n";
+
+ trace_printk(fmt, sizeof(fmt));
+ return TC_H_MAKE(1, 43);
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+ char fmt[] = "fallthrough\n";
+
+ tail_call(skb, &jmp_tc, 0);
+ trace_printk(fmt, sizeof(fmt));
+
+ return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c
index 009febd..4728049 100644
--- a/examples/bpf/bpf_prog.c
+++ b/examples/bpf/bpf_prog.c
@@ -168,8 +168,8 @@
/* Common, shared definitions with ebpf_agent.c. */
#include "bpf_shared.h"
-/* Selection of BPF helper functions for our example. */
-#include "bpf_funcs.h"
+/* BPF helper functions for our example. */
+#include "../../include/bpf_api.h"
/* Could be defined here as well, or included from the header. */
#define TC_ACT_UNSPEC (-1)
@@ -387,10 +387,10 @@ static inline void cls_update_proto_map(const struct __sk_buff *skb,
uint8_t proto = flow->ip_proto;
struct count_tuple *ct, _ct;
- ct = bpf_map_lookup_elem(&map_proto, &proto);
+ ct = map_lookup_elem(&map_proto, &proto);
if (likely(ct)) {
- __sync_fetch_and_add(&ct->packets, 1);
- __sync_fetch_and_add(&ct->bytes, skb->len);
+ lock_xadd(&ct->packets, 1);
+ lock_xadd(&ct->bytes, skb->len);
return;
}
@@ -398,7 +398,7 @@ static inline void cls_update_proto_map(const struct __sk_buff *skb,
_ct.packets = 1;
_ct.bytes = skb->len;
- bpf_map_update_elem(&map_proto, &proto, &_ct, BPF_ANY);
+ map_update_elem(&map_proto, &proto, &_ct, BPF_ANY);
}
static inline void cls_update_queue_map(const struct __sk_buff *skb)
@@ -409,11 +409,11 @@ static inline void cls_update_queue_map(const struct __sk_buff *skb)
mismatch = skb->queue_mapping != get_smp_processor_id();
- cq = bpf_map_lookup_elem(&map_queue, &queue);
+ cq = map_lookup_elem(&map_queue, &queue);
if (likely(cq)) {
- __sync_fetch_and_add(&cq->total, 1);
+ lock_xadd(&cq->total, 1);
if (mismatch)
- __sync_fetch_and_add(&cq->mismatch, 1);
+ lock_xadd(&cq->mismatch, 1);
return;
}
@@ -421,7 +421,7 @@ static inline void cls_update_queue_map(const struct __sk_buff *skb)
_cq.total = 1;
_cq.mismatch = mismatch ? 1 : 0;
- bpf_map_update_elem(&map_queue, &queue, &_cq, BPF_ANY);
+ map_update_elem(&map_queue, &queue, &_cq, BPF_ANY);
}
/* eBPF program definitions, placed in various sections, which can
@@ -439,7 +439,8 @@ static inline void cls_update_queue_map(const struct __sk_buff *skb)
* It is however not required to have multiple programs sharing
* a file.
*/
-__section("classifier") int cls_main(struct __sk_buff *skb)
+__section("classifier")
+int cls_main(struct __sk_buff *skb)
{
struct flow_keys flow;
@@ -456,13 +457,14 @@ static inline void act_update_drop_map(void)
{
uint32_t *count, cpu = get_smp_processor_id();
- count = bpf_map_lookup_elem(&map_drops, &cpu);
+ count = map_lookup_elem(&map_drops, &cpu);
if (count)
/* Only this cpu is accessing this element. */
(*count)++;
}
-__section("action-mark") int act_mark_main(struct __sk_buff *skb)
+__section("action-mark")
+int act_mark_main(struct __sk_buff *skb)
{
/* You could also mangle skb data here with the helper function
* BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could
@@ -479,7 +481,8 @@ __section("action-mark") int act_mark_main(struct __sk_buff *skb)
return TC_ACT_UNSPEC;
}
-__section("action-rand") int act_rand_main(struct __sk_buff *skb)
+__section("action-rand")
+int act_rand_main(struct __sk_buff *skb)
{
/* Sorry, we're near event horizon ... */
if ((get_prandom_u32() & 3) == 0) {
@@ -493,4 +496,4 @@ __section("action-rand") int act_rand_main(struct __sk_buff *skb)
/* Last but not least, the file contains a license. Some future helper
* functions may only be available with a GPL license.
*/
-char __license[] __section("license") = "GPL";
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_shared.c b/examples/bpf/bpf_shared.c
new file mode 100644
index 0000000..accc0ad
--- /dev/null
+++ b/examples/bpf/bpf_shared.c
@@ -0,0 +1,48 @@
+#include "../../include/bpf_api.h"
+
+/* Minimal, stand-alone toy map pinning example:
+ *
+ * clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
+ * tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
+ * tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
+ *
+ * Both classifier will share the very same map instance in this example,
+ * so map content can be accessed from ingress *and* egress side!
+ *
+ * This example has a pinning of PIN_OBJECT_NS, so it's private and
+ * thus shared among various program sections within the object.
+ *
+ * A setting of PIN_GLOBAL_NS would place it into a global namespace,
+ * so that it can be shared among different object files. A setting
+ * of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
+ * instance is being created.
+ */
+
+BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1); /* or PIN_GLOBAL_NS, or PIN_NONE */
+
+__section("egress")
+int emain(struct __sk_buff *skb)
+{
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ lock_xadd(val, 1);
+
+ return BPF_H_DEFAULT;
+}
+
+__section("ingress")
+int imain(struct __sk_buff *skb)
+{
+ char fmt[] = "map val: %d\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val);
+
+ return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_shared.h b/examples/bpf/bpf_shared.h
index 46423ec..a24038d 100644
--- a/examples/bpf/bpf_shared.h
+++ b/examples/bpf/bpf_shared.h
@@ -1,10 +1,6 @@
#ifndef __BPF_SHARED__
#define __BPF_SHARED__
-#include <stdint.h>
-
-#include "../../include/bpf_elf.h"
-
enum {
BPF_MAP_ID_PROTO,
BPF_MAP_ID_QUEUE,
@@ -14,7 +10,7 @@ enum {
};
struct count_tuple {
- long packets; /* type long for __sync_fetch_and_add() */
+ long packets; /* type long for lock_xadd() */
long bytes;
};
diff --git a/examples/bpf/bpf_tailcall.c b/examples/bpf/bpf_tailcall.c
new file mode 100644
index 0000000..040790d
--- /dev/null
+++ b/examples/bpf/bpf_tailcall.c
@@ -0,0 +1,99 @@
+#include "../../include/bpf_api.h"
+
+#define ENTRY_INIT 3
+#define ENTRY_0 0
+#define ENTRY_1 1
+#define MAX_JMP_SIZE 2
+
+#define FOO 42
+#define BAR 43
+
+/* This example doesn't really do anything useful, but it's purpose is to
+ * demonstrate eBPF tail calls on a very simple example.
+ *
+ * cls_entry() is our classifier entry point, from there we jump based on
+ * skb->hash into cls_case1() or cls_case2(). They are both part of the
+ * program array jmp_tc. Indicated via __section_tail(), the tc loader
+ * populates the program arrays with the loaded file descriptors already.
+ *
+ * To demonstrate nested jumps, cls_case2() jumps within the same jmp_tc
+ * array to cls_case1(). And whenever we arrive at cls_case1(), we jump
+ * into cls_exit(), part of the jump array jmp_ex.
+ *
+ * Also, to show it's possible, all programs share map_sh and dump the value
+ * that the entry point incremented. The sections that are loaded into a
+ * program array can be atomically replaced during run-time, e.g. to change
+ * classifier behaviour.
+ */
+
+BPF_PROG_ARRAY(jmp_tc, FOO, PIN_OBJECT_NS, MAX_JMP_SIZE);
+BPF_PROG_ARRAY(jmp_ex, BAR, PIN_OBJECT_NS, 1);
+
+BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1);
+
+__section_tail(FOO, ENTRY_0)
+int cls_case1(struct __sk_buff *skb)
+{
+ char fmt[] = "case1: map-val: %d from:%u\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]);
+
+ skb->cb[0] = ENTRY_0;
+ tail_call(skb, &jmp_ex, ENTRY_0);
+
+ return BPF_H_DEFAULT;
+}
+
+__section_tail(FOO, ENTRY_1)
+int cls_case2(struct __sk_buff *skb)
+{
+ char fmt[] = "case2: map-val: %d from:%u\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]);
+
+ skb->cb[0] = ENTRY_1;
+ tail_call(skb, &jmp_tc, ENTRY_0);
+
+ return BPF_H_DEFAULT;
+}
+
+__section_tail(BAR, ENTRY_0)
+int cls_exit(struct __sk_buff *skb)
+{
+ char fmt[] = "exit: map-val: %d from:%u\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]);
+
+ /* Termination point. */
+ return BPF_H_DEFAULT;
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+ char fmt[] = "fallthrough\n";
+ int key = 0, *val;
+
+ /* For transferring state, we can use skb->cb[0] ... skb->cb[4]. */
+ val = map_lookup_elem(&map_sh, &key);
+ if (val) {
+ lock_xadd(val, 1);
+
+ skb->cb[0] = ENTRY_INIT;
+ tail_call(skb, &jmp_tc, skb->hash & (MAX_JMP_SIZE - 1));
+ }
+
+ trace_printk(fmt, sizeof(fmt));
+ return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/include/bpf_api.h b/include/bpf_api.h
new file mode 100644
index 0000000..8503b9a
--- /dev/null
+++ b/include/bpf_api.h
@@ -0,0 +1,225 @@
+#ifndef __BPF_API__
+#define __BPF_API__
+
+/* Note:
+ *
+ * This file can be included into eBPF kernel programs. It contains
+ * a couple of useful helper functions, map/section ABI (bpf_elf.h),
+ * misc macros and some eBPF specific LLVM built-ins.
+ */
+
+#include <stdint.h>
+
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+#include <asm/byteorder.h>
+
+#include "bpf_elf.h"
+
+/** Misc macros. */
+
+#ifndef __stringify
+# define __stringify(X) #X
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((__unused__))
+#endif
+
+#ifndef offsetof
+# define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
+#endif
+
+#ifndef likely
+# define likely(X) __builtin_expect(!!(X), 1)
+#endif
+
+#ifndef unlikely
+# define unlikely(X) __builtin_expect(!!(X), 0)
+#endif
+
+#ifndef htons
+# define htons(X) __constant_htons((X))
+#endif
+
+#ifndef ntohs
+# define ntohs(X) __constant_ntohs((X))
+#endif
+
+#ifndef htonl
+# define htonl(X) __constant_htonl((X))
+#endif
+
+#ifndef ntohl
+# define ntohl(X) __constant_ntohl((X)
+#endif
+
+/** Section helper macros. */
+
+#ifndef __section
+# define __section(NAME) \
+ __attribute__((section(NAME), used))
+#endif
+
+#ifndef __section_tail
+# define __section_tail(ID, KEY) \
+ __section(__stringify(ID) "/" __stringify(KEY))
+#endif
+
+#ifndef __section_cls_entry
+# define __section_cls_entry \
+ __section(ELF_SECTION_CLASSIFIER)
+#endif
+
+#ifndef __section_act_entry
+# define __section_act_entry \
+ __section(ELF_SECTION_ACTION)
+#endif
+
+#ifndef __section_license
+# define __section_license \
+ __section(ELF_SECTION_LICENSE)
+#endif
+
+#ifndef __section_maps
+# define __section_maps \
+ __section(ELF_SECTION_MAPS)
+#endif
+
+/** Declaration helper macros. */
+
+#ifndef BPF_LICENSE
+# define BPF_LICENSE(NAME) \
+ char ____license[] __section_license = NAME
+#endif
+
+#ifndef __BPF_MAP
+# define __BPF_MAP(NAME, TYPE, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ struct bpf_elf_map __section_maps NAME = { \
+ .type = (TYPE), \
+ .id = (ID), \
+ .size_key = (SIZE_KEY), \
+ .size_value = (SIZE_VALUE), \
+ .pinning = (PIN), \
+ .max_elem = (MAX_ELEM), \
+ }
+#endif
+
+#ifndef BPF_HASH
+# define BPF_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_HASH, ID, SIZE_KEY, SIZE_VALUE, \
+ PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY
+# define BPF_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_ARRAY, ID, sizeof(uint32_t), \
+ SIZE_VALUE, PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY2
+# define BPF_ARRAY2(NAME, ID, PIN, MAX_ELEM) \
+ BPF_ARRAY(NAME, ID, sizeof(uint16_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY4
+# define BPF_ARRAY4(NAME, ID, PIN, MAX_ELEM) \
+ BPF_ARRAY(NAME, ID, sizeof(uint32_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY8
+# define BPF_ARRAY8(NAME, ID, PIN, MAX_ELEM) \
+ BPF_ARRAY(NAME, ID, sizeof(uint64_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PROG_ARRAY
+# define BPF_PROG_ARRAY(NAME, ID, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PROG_ARRAY, ID, sizeof(uint32_t), \
+ sizeof(uint32_t), PIN, MAX_ELEM)
+#endif
+
+/** Classifier helper */
+
+#ifndef BPF_H_DEFAULT
+# define BPF_H_DEFAULT -1
+#endif
+
+/** BPF helper functions for tc. */
+
+#ifndef BPF_FUNC
+# define BPF_FUNC(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused = (void *) BPF_FUNC_##NAME
+#endif
+
+/* Map access/manipulation */
+static void *BPF_FUNC(map_lookup_elem, void *map, const void *key);
+static int BPF_FUNC(map_update_elem, void *map, const void *key,
+ const void *value, uint32_t flags);
+static int BPF_FUNC(map_delete_elem, void *map, const void *key);
+
+/* Time access */
+static uint64_t BPF_FUNC(ktime_get_ns);
+
+/* Debugging */
+static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
+
+/* Random numbers */
+static uint32_t BPF_FUNC(get_prandom_u32);
+
+/* Tail calls */
+static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
+ uint32_t index);
+
+/* System helpers */
+static uint32_t BPF_FUNC(get_smp_processor_id);
+
+/* Packet misc meta data */
+static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb);
+
+/* Packet redirection */
+static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
+static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex,
+ uint32_t flags);
+
+/* Packet manipulation */
+#define BPF_PSEUDO_HDR 0x10
+#define BPF_HAS_PSEUDO_HDR(flags) ((flags) & BPF_PSEUDO_HDR)
+#define BPF_HDR_FIELD_SIZE(flags) ((flags) & 0x0f)
+
+static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off,
+ void *from, uint32_t len, uint32_t flags);
+static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+
+/* Packet vlan encap/decap */
+static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
+ uint16_t vlan_tci);
+static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb);
+
+/* Packet tunnel encap/decap */
+static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb,
+ struct bpf_tunnel_key *to, uint32_t size, uint32_t flags);
+static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb,
+ struct bpf_tunnel_key *from, uint32_t size, uint32_t flags);
+
+/** LLVM built-ins */
+
+#ifndef lock_xadd
+# define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val))
+#endif
+
+unsigned long long load_byte(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.byte");
+
+unsigned long long load_half(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.half");
+
+unsigned long long load_word(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.word");
+
+#endif /* __BPF_API__ */
diff --git a/include/bpf_elf.h b/include/bpf_elf.h
index 4bd6bb0..31a8974 100644
--- a/include/bpf_elf.h
+++ b/include/bpf_elf.h
@@ -21,6 +21,11 @@
#define ELF_MAX_MAPS 64
#define ELF_MAX_LICENSE_LEN 128
+/* Object pinning settings */
+#define PIN_NONE 0
+#define PIN_OBJECT_NS 1
+#define PIN_GLOBAL_NS 2
+
/* ELF map definition */
struct bpf_elf_map {
__u32 type;
@@ -28,6 +33,7 @@ struct bpf_elf_map {
__u32 size_value;
__u32 max_elem;
__u32 id;
+ __u32 pinning;
};
#endif /* __BPF_ELF__ */
diff --git a/include/utils.h b/include/utils.h
index cc821e8..7310f4e 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -40,6 +40,10 @@ extern bool do_all;
#define IPSEC_PROTO_ANY 255
#endif
+#ifndef CONFDIR
+#define CONFDIR "/etc/iproute2"
+#endif
+
#define SPRINT_BSIZE 64
#define SPRINT_BUF(x) char x[SPRINT_BSIZE]
@@ -196,6 +200,9 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
__attribute__ ((format (printf, (pos_str), (pos_args))))
#endif
+#define _textify(x) #x
+#define textify(x) _textify(x)
+
#define htonll(x) ((1==htonl(1)) ? (x) : ((uint64_t)htonl((x) & 0xFFFFFFFF) << 32) | htonl((x) >> 32))
#define ntohll(x) ((1==ntohl(1)) ? (x) : ((uint64_t)ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((x) >> 32))
diff --git a/lib/rt_names.c b/lib/rt_names.c
index 1071a93..f6d17c0 100644
--- a/lib/rt_names.c
+++ b/lib/rt_names.c
@@ -23,10 +23,7 @@
#include <linux/rtnetlink.h>
#include "rt_names.h"
-
-#ifndef CONFDIR
-#define CONFDIR "/etc/iproute2"
-#endif
+#include "utils.h"
#define NAME_MAX_LEN 512
diff --git a/tc/e_bpf.c b/tc/e_bpf.c
index 218ba40..2d650a4 100644
--- a/tc/e_bpf.c
+++ b/tc/e_bpf.c
@@ -26,10 +26,19 @@ static char *argv_default[] = { BPF_DEFAULT_CMD, NULL };
static void explain(void)
{
- fprintf(stderr, "Usage: ... bpf [ import UDS_FILE ] [ run CMD ]\n\n");
+ fprintf(stderr, "Usage: ... bpf [ import UDS_FILE ] [ run CMD ]\n");
+ fprintf(stderr, " ... bpf [ debug ]\n");
+ fprintf(stderr, " ... bpf [ graft MAP_FILE ] [ key KEY ]\n");
+ fprintf(stderr, " `... [ object-file OBJ_FILE ] [ type TYPE ] [ section NAME ] [ verbose ]\n");
+ fprintf(stderr, " `... [ object-pinned PROG_FILE ]\n");
+ fprintf(stderr, "\n");
fprintf(stderr, "Where UDS_FILE provides the name of a unix domain socket file\n");
fprintf(stderr, "to import eBPF maps and the optional CMD denotes the command\n");
fprintf(stderr, "to be executed (default: \'%s\').\n", BPF_DEFAULT_CMD);
+ fprintf(stderr, "Where MAP_FILE points to a pinned map, OBJ_FILE to an object file\n");
+ fprintf(stderr, "and PROG_FILE to a pinned program. TYPE can be {cls, act}, where\n");
+ fprintf(stderr, "\'cls\' is default. KEY is optional and can be inferred from the\n");
+ fprintf(stderr, "section name, otherwise it needs to be provided.\n");
}
static int bpf_num_env_entries(void)
@@ -58,17 +67,40 @@ static int parse_bpf(struct exec_util *eu, int argc, char **argv)
NEXT_ARG();
argv_run = argv;
break;
- } else if (matches(*argv, "import") == 0 ||
- matches(*argv, "imp") == 0) {
+ } else if (matches(*argv, "import") == 0) {
NEXT_ARG();
bpf_uds_name = *argv;
+ } else if (matches(*argv, "debug") == 0 ||
+ matches(*argv, "dbg") == 0) {
+ if (bpf_trace_pipe())
+ fprintf(stderr,
+ "No trace pipe, tracefs not mounted?\n");
+ return -1;
+ } else if (matches(*argv, "graft") == 0) {
+ const char *bpf_map_path;
+ bool has_key = false;
+ uint32_t key;
+
+ NEXT_ARG();
+ bpf_map_path = *argv;
+ NEXT_ARG();
+ if (matches(*argv, "key") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&key, *argv, 0)) {
+ fprintf(stderr, "Illegal \"key\"\n");
+ return -1;
+ }
+ has_key = true;
+ NEXT_ARG();
+ }
+ return bpf_graft_map(bpf_map_path, has_key ?
+ &key : NULL, argc, argv);
} else {
explain();
return -1;
}
- argc--;
- argv++;
+ NEXT_ARG_FWD();
}
if (!bpf_uds_name) {
@@ -142,6 +174,6 @@ err:
}
struct exec_util bpf_exec_util = {
- .id = "bpf",
- .parse_eopt = parse_bpf,
+ .id = "bpf",
+ .parse_eopt = parse_bpf,
};
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index ac77af5..afc2e58 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -11,19 +11,8 @@
#include <stdio.h>
#include <stdlib.h>
-#include <unistd.h>
-#include <syslog.h>
-#include <fcntl.h>
-#include <libgen.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <string.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <limits.h>
-#include <linux/filter.h>
-#include <linux/if.h>
+
+#include <linux/bpf.h>
#include "utils.h"
#include "tc_util.h"
@@ -31,6 +20,13 @@
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS;
+static const int nla_tbl[BPF_NLA_MAX] = {
+ [BPF_NLA_OPS_LEN] = TCA_BPF_OPS_LEN,
+ [BPF_NLA_OPS] = TCA_BPF_OPS,
+ [BPF_NLA_FD] = TCA_BPF_FD,
+ [BPF_NLA_NAME] = TCA_BPF_NAME,
+};
+
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ...\n");
@@ -42,6 +38,7 @@ static void explain(void)
fprintf(stderr, "eBPF use case:\n");
fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]");
fprintf(stderr, " [ verbose ] [ direct-action ]\n");
+ fprintf(stderr, " object-pinned FILE [ direct-action ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Common remaining options:\n");
fprintf(stderr, " [ action ACTION_SPEC ]\n");
@@ -51,7 +48,8 @@ static void explain(void)
fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
- fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n");
+ fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n");
+ fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n");
fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type));
@@ -66,119 +64,38 @@ static void explain(void)
static int bpf_parse_opt(struct filter_util *qu, char *handle,
int argc, char **argv, struct nlmsghdr *n)
{
+ const char *bpf_obj = NULL, *bpf_uds_name = NULL;
struct tcmsg *t = NLMSG_DATA(n);
- const char *bpf_uds_name = NULL;
- const char *bpf_sec_name = NULL;
unsigned int bpf_flags = 0;
- char *bpf_obj = NULL;
- struct rtattr *tail;
bool seen_run = false;
- long h = 0;
+ struct rtattr *tail;
int ret = 0;
if (argc == 0)
return 0;
if (handle) {
- h = strtol(handle, NULL, 0);
- if (h == LONG_MIN || h == LONG_MAX) {
- fprintf(stderr, "Illegal handle \"%s\", must be "
- "numeric.\n", handle);
+ if (get_u32(&t->tcm_handle, handle, 0)) {
+ fprintf(stderr, "Illegal \"handle\"\n");
return -1;
}
}
- t->tcm_handle = h;
-
tail = (struct rtattr *)(((void *)n) + NLMSG_ALIGN(n->nlmsg_len));
addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
while (argc > 0) {
if (matches(*argv, "run") == 0) {
- struct sock_filter bpf_ops[BPF_MAXINSNS];
- bool from_file, ebpf, bpf_verbose;
- int ret;
-
NEXT_ARG();
opt_bpf:
- bpf_sec_name = bpf_default_section(bpf_type);
- bpf_verbose = false;
- ebpf = false;
seen_run = true;
-
- if (strcmp(*argv, "bytecode-file") == 0 ||
- strcmp(*argv, "bcf") == 0) {
- from_file = true;
- } else if (strcmp(*argv, "bytecode") == 0 ||
- strcmp(*argv, "bc") == 0) {
- from_file = false;
- } else if (strcmp(*argv, "object-file") == 0 ||
- strcmp(*argv, "obj") == 0) {
- ebpf = true;
- } else {
- fprintf(stderr, "What is \"%s\"?\n", *argv);
- explain();
- return -1;
- }
-
- NEXT_ARG();
- if (ebpf) {
- bpf_uds_name = getenv(BPF_ENV_UDS);
- bpf_obj = *argv;
-
- NEXT_ARG_FWD();
-
- if (argc > 0 &&
- (strcmp(*argv, "section") == 0 ||
- strcmp(*argv, "sec") == 0)) {
- NEXT_ARG();
- bpf_sec_name = *argv;
- NEXT_ARG_FWD();
- }
- if (argc > 0 && !bpf_uds_name &&
- (strcmp(*argv, "export") == 0 ||
- strcmp(*argv, "exp") == 0)) {
- NEXT_ARG();
- bpf_uds_name = *argv;
- NEXT_ARG_FWD();
- }
- if (argc > 0 &&
- (strcmp(*argv, "verbose") == 0 ||
- strcmp(*argv, "verb") == 0)) {
- bpf_verbose = true;
- NEXT_ARG_FWD();
- }
-
- PREV_ARG();
- }
-
- ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name,
- bpf_verbose) :
- bpf_parse_ops(argc, argv, bpf_ops, from_file);
- if (ret < 0) {
- fprintf(stderr, "%s\n", ebpf ?
- "Could not load object" :
- "Illegal \"bytecode\"");
+ if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
+ &bpf_obj, &bpf_uds_name, n)) {
+ fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
return -1;
}
-
- if (ebpf) {
- char bpf_name[256];
-
- bpf_obj = basename(bpf_obj);
-
- snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]",
- bpf_obj, bpf_sec_name);
-
- addattr32(n, MAX_MSG, TCA_BPF_FD, ret);
- addattrstrz(n, MAX_MSG, TCA_BPF_NAME, bpf_name);
- } else {
- addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret);
- addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
- ret * sizeof(struct sock_filter));
- }
} else if (matches(*argv, "classid") == 0 ||
- strcmp(*argv, "flowid") == 0) {
+ matches(*argv, "flowid") == 0) {
unsigned int handle;
NEXT_ARG();
@@ -204,7 +121,7 @@ opt_bpf:
return -1;
}
continue;
- } else if (strcmp(*argv, "help") == 0) {
+ } else if (matches(*argv, "help") == 0) {
explain();
return -1;
} else {
@@ -280,7 +197,7 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
}
struct filter_util bpf_filter_util = {
- .id = "bpf",
- .parse_fopt = bpf_parse_opt,
- .print_fopt = bpf_print_opt,
+ .id = "bpf",
+ .parse_fopt = bpf_parse_opt,
+ .print_fopt = bpf_print_opt,
};
diff --git a/tc/m_bpf.c b/tc/m_bpf.c
index fb4c3c7..c5e2fa5 100644
--- a/tc/m_bpf.c
+++ b/tc/m_bpf.c
@@ -12,20 +12,23 @@
#include <stdio.h>
#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdbool.h>
-#include <libgen.h>
+
#include <linux/bpf.h>
#include <linux/tc_act/tc_bpf.h>
#include "utils.h"
-#include "rt_names.h"
#include "tc_util.h"
#include "tc_bpf.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT;
+static const int nla_tbl[BPF_NLA_MAX] = {
+ [BPF_NLA_OPS_LEN] = TCA_ACT_BPF_OPS_LEN,
+ [BPF_NLA_OPS] = TCA_ACT_BPF_OPS,
+ [BPF_NLA_FD] = TCA_ACT_BPF_FD,
+ [BPF_NLA_NAME] = TCA_ACT_BPF_NAME,
+};
+
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ... [ index INDEX ]\n");
@@ -37,12 +40,14 @@ static void explain(void)
fprintf(stderr, "eBPF use case:\n");
fprintf(stderr, " object-file FILE [ section ACT_NAME ] [ export UDS_FILE ]");
fprintf(stderr, " [ verbose ]\n");
+ fprintf(stderr, " object-pinned FILE\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
- fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode.\n");
+ fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n");
+ fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n");
fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type));
@@ -54,114 +59,40 @@ static void explain(void)
fprintf(stderr, "explicitly specifies an action index upon creation.\n");
}
-static void usage(void)
+static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv,
+ int tca_id, struct nlmsghdr *n)
{
- explain();
- exit(-1);
-}
-
-static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p,
- int tca_id, struct nlmsghdr *n)
-{
- char **argv = *argv_p, bpf_name[256];
+ const char *bpf_obj = NULL, *bpf_uds_name = NULL;
+ struct tc_act_bpf parm;
+ bool seen_run = false;
struct rtattr *tail;
- struct tc_act_bpf parm = { 0 };
- struct sock_filter bpf_ops[BPF_MAXINSNS];
- bool ebpf_fill = false, bpf_fill = false;
- bool ebpf = false, seen_run = false;
- const char *bpf_uds_name = NULL;
- const char *bpf_sec_name = NULL;
- char *bpf_obj = NULL;
- int argc = *argc_p, ret = 0;
- __u16 bpf_len = 0;
- __u32 bpf_fd = 0;
+ int argc, ret = 0;
+ char **argv;
+
+ argv = *ptr_argv;
+ argc = *ptr_argc;
if (matches(*argv, "bpf") != 0)
return -1;
NEXT_ARG();
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, MAX_MSG, tca_id, NULL, 0);
+
while (argc > 0) {
if (matches(*argv, "run") == 0) {
- bool from_file, bpf_verbose;
- int ret;
-
NEXT_ARG();
opt_bpf:
- bpf_sec_name = bpf_default_section(bpf_type);
- bpf_verbose = false;
seen_run = true;
-
- if (strcmp(*argv, "bytecode-file") == 0 ||
- strcmp(*argv, "bcf") == 0) {
- from_file = true;
- } else if (strcmp(*argv, "bytecode") == 0 ||
- strcmp(*argv, "bc") == 0) {
- from_file = false;
- } else if (strcmp(*argv, "object-file") == 0 ||
- strcmp(*argv, "obj") == 0) {
- ebpf = true;
- } else {
- fprintf(stderr, "unexpected \"%s\"\n", *argv);
- explain();
+ if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
+ &bpf_obj, &bpf_uds_name, n)) {
+ fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
return -1;
}
-
- NEXT_ARG();
- if (ebpf) {
- bpf_uds_name = getenv(BPF_ENV_UDS);
- bpf_obj = *argv;
-
- NEXT_ARG_FWD();
-
- if (argc > 0 &&
- (strcmp(*argv, "section") == 0 ||
- strcmp(*argv, "sec") == 0)) {
- NEXT_ARG();
- bpf_sec_name = *argv;
- NEXT_ARG_FWD();
- }
- if (argc > 0 && !bpf_uds_name &&
- (strcmp(*argv, "export") == 0 ||
- strcmp(*argv, "exp") == 0)) {
- NEXT_ARG();
- bpf_uds_name = *argv;
- NEXT_ARG_FWD();
- }
- if (argc > 0 &&
- (strcmp(*argv, "verbose") == 0 ||
- strcmp(*argv, "verb") == 0)) {
- bpf_verbose = true;
- NEXT_ARG_FWD();
- }
-
- PREV_ARG();
- }
-
- ret = ebpf ? bpf_open_object(bpf_obj, bpf_type, bpf_sec_name,
- bpf_verbose) :
- bpf_parse_ops(argc, argv, bpf_ops, from_file);
- if (ret < 0) {
- fprintf(stderr, "%s\n", ebpf ?
- "Could not load object" :
- "Illegal \"bytecode\"");
- return -1;
- }
-
- if (ebpf) {
- bpf_obj = basename(bpf_obj);
-
- snprintf(bpf_name, sizeof(bpf_name), "%s:[%s]",
- bpf_obj, bpf_sec_name);
-
- bpf_fd = ret;
- ebpf_fill = true;
- } else {
- bpf_len = ret;
- bpf_fill = true;
- }
} else if (matches(*argv, "help") == 0) {
- usage();
+ explain();
+ return -1;
} else if (matches(*argv, "index") == 0) {
break;
} else {
@@ -173,7 +104,9 @@ opt_bpf:
NEXT_ARG_FWD();
}
+ memset(&parm, 0, sizeof(parm));
parm.action = TC_ACT_PIPE;
+
if (argc) {
if (matches(*argv, "reclassify") == 0) {
parm.action = TC_ACT_RECLASSIFY;
@@ -207,32 +140,19 @@ opt_bpf:
}
}
- tail = NLMSG_TAIL(n);
-
- addattr_l(n, MAX_MSG, tca_id, NULL, 0);
addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm));
-
- if (ebpf_fill) {
- addattr32(n, MAX_MSG, TCA_ACT_BPF_FD, bpf_fd);
- addattrstrz(n, MAX_MSG, TCA_ACT_BPF_NAME, bpf_name);
- } else if (bpf_fill) {
- addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len);
- addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops,
- bpf_len * sizeof(struct sock_filter));
- }
-
tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail;
- *argc_p = argc;
- *argv_p = argv;
-
if (bpf_uds_name)
ret = bpf_send_map_fds(bpf_uds_name, bpf_obj);
+ *ptr_argc = argc;
+ *ptr_argv = argv;
+
return ret;
}
-static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
+static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg)
{
struct rtattr *tb[TCA_ACT_BPF_MAX + 1];
struct tc_act_bpf *parm;
@@ -249,7 +169,6 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
}
parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]);
-
fprintf(f, "bpf ");
if (tb[TCA_ACT_BPF_NAME])
@@ -276,12 +195,11 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
}
fprintf(f, "\n ");
-
return 0;
}
struct action_util bpf_action_util = {
- .id = "bpf",
- .parse_aopt = parse_bpf,
- .print_aopt = print_bpf,
+ .id = "bpf",
+ .parse_aopt = bpf_parse_opt,
+ .print_aopt = bpf_print_opt,
};
diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c
index 276871a..beb74be 100644
--- a/tc/tc_bpf.c
+++ b/tc/tc_bpf.c
@@ -20,18 +20,25 @@
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/un.h>
-#include <linux/filter.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#ifdef HAVE_ELF
#include <libelf.h>
#include <gelf.h>
#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <sys/vfs.h>
+#include <sys/mount.h>
+#include <sys/syscall.h>
+#include <sys/sendfile.h>
+#include <sys/resource.h>
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/if_alg.h>
+
#include "utils.h"
#include "bpf_elf.h"
@@ -40,9 +47,51 @@
#include "tc_util.h"
#include "tc_bpf.h"
-int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
- char **bpf_string, bool *need_release,
- const char separator)
+#ifdef HAVE_ELF
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose);
+#else
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose)
+{
+ fprintf(stderr, "No ELF library support compiled in.\n");
+ errno = ENOSYS;
+ return -1;
+}
+#endif
+
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+#ifdef __NR_bpf
+ return syscall(__NR_bpf, cmd, attr, size);
+#else
+ fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+static int bpf_map_update(int fd, const void *key, const void *value,
+ uint64_t flags)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = bpf_ptr_to_u64(key),
+ .value = bpf_ptr_to_u64(value),
+ .flags = flags,
+ };
+
+ return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
+ char **bpf_string, bool *need_release,
+ const char separator)
{
char sp;
@@ -90,8 +139,8 @@ int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
return 0;
}
-int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
- bool from_file)
+static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
+ bool from_file)
{
char *bpf_string, *token, separator = ',';
int ret = 0, i = 0;
@@ -135,7 +184,6 @@ int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
goto out;
}
ret = bpf_len;
-
out:
if (need_release)
free(bpf_string);
@@ -161,6 +209,246 @@ void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
ops[i].jf, ops[i].k);
}
+static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
+ int length)
+{
+ char file[PATH_MAX], buff[4096];
+ struct bpf_elf_map tmp, zero;
+ unsigned int val;
+ FILE *fp;
+
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "No procfs support?!\n");
+ return -EIO;
+ }
+
+ memset(&tmp, 0, sizeof(tmp));
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
+ tmp.type = val;
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+ tmp.size_key = val;
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+ tmp.size_value = val;
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+ tmp.max_elem = val;
+ }
+
+ fclose(fp);
+
+ if (!memcmp(&tmp, map, length)) {
+ return 0;
+ } else {
+ memset(&zero, 0, sizeof(zero));
+ /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
+ * so just accept it. We know we do have an eBPF fd and in this
+ * case, everything is 0. It is guaranteed that no such map exists
+ * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
+ */
+ if (!memcmp(&tmp, &zero, length))
+ return 0;
+
+ fprintf(stderr, "Map specs from pinned file differ!\n");
+ return -EINVAL;
+ }
+}
+
+static int bpf_mnt_fs(const char *target)
+{
+ bool bind_done = false;
+
+ while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
+ if (errno != EINVAL || bind_done) {
+ fprintf(stderr, "mount --make-private %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ if (mount(target, target, "none", MS_BIND, NULL)) {
+ fprintf(stderr, "mount --bind %s %s failed: %s\n",
+ target, target, strerror(errno));
+ return -1;
+ }
+
+ bind_done = true;
+ }
+
+ if (mount("bpf", target, "bpf", 0, NULL)) {
+ fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
+{
+ struct statfs st_fs;
+
+ if (statfs(mnt, &st_fs) < 0)
+ return -ENOENT;
+ if ((unsigned long)st_fs.f_type != magic)
+ return -ENOENT;
+
+ return 0;
+}
+
+static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
+ char *mnt, int len,
+ const char * const *known_mnts)
+{
+ const char * const *ptr;
+ char type[100];
+ FILE *fp;
+
+ if (known_mnts) {
+ ptr = known_mnts;
+ while (*ptr) {
+ if (bpf_valid_mntpt(*ptr, magic) == 0) {
+ strncpy(mnt, *ptr, len - 1);
+ mnt[len - 1] = 0;
+ return mnt;
+ }
+ ptr++;
+ }
+ }
+
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL || len != PATH_MAX)
+ return NULL;
+
+ while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
+ mnt, type) == 2) {
+ if (strcmp(type, fstype) == 0)
+ break;
+ }
+
+ fclose(fp);
+ if (strcmp(type, fstype) != 0)
+ return NULL;
+
+ return mnt;
+}
+
+int bpf_trace_pipe(void)
+{
+ char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
+ static const char * const tracefs_known_mnts[] = {
+ TRACE_DIR_MNT,
+ "/sys/kernel/debug/tracing",
+ "/tracing",
+ "/trace",
+ 0,
+ };
+ char tpipe[PATH_MAX];
+ const char *mnt;
+ int fd;
+
+ mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
+ sizeof(tracefs_mnt), tracefs_known_mnts);
+ if (!mnt) {
+ fprintf(stderr, "tracefs not mounted?\n");
+ return -1;
+ }
+
+ snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
+
+ fd = open(tpipe, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ fprintf(stderr, "Running! Hang up with ^C!\n\n");
+ while (1) {
+ static char buff[4096];
+ ssize_t ret;
+
+ ret = read(fd, buff, sizeof(buff) - 1);
+ if (ret > 0) {
+ write(2, buff, ret);
+ fflush(stderr);
+ }
+ }
+
+ return 0;
+}
+
+static const char *bpf_get_tc_dir(void)
+{
+ static bool bpf_mnt_cached = false;
+ static char bpf_tc_dir[PATH_MAX];
+ static const char *mnt;
+ static const char * const bpf_known_mnts[] = {
+ BPF_DIR_MNT,
+ 0,
+ };
+ char bpf_mnt[PATH_MAX] = BPF_DIR_MNT;
+ char bpf_glo_dir[PATH_MAX];
+ int ret;
+
+ if (bpf_mnt_cached)
+ goto done;
+
+ mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt),
+ bpf_known_mnts);
+ if (!mnt) {
+ mnt = getenv(BPF_ENV_MNT);
+ if (!mnt)
+ mnt = BPF_DIR_MNT;
+ ret = bpf_mnt_fs(mnt);
+ if (ret) {
+ mnt = NULL;
+ goto out;
+ }
+ }
+
+ snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC);
+ ret = mkdir(bpf_tc_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir,
+ strerror(errno));
+ mnt = NULL;
+ goto out;
+ }
+
+ snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s",
+ bpf_tc_dir, BPF_DIR_GLOBALS);
+ ret = mkdir(bpf_glo_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
+ strerror(errno));
+ mnt = NULL;
+ goto out;
+ }
+
+ mnt = bpf_tc_dir;
+out:
+ bpf_mnt_cached = true;
+done:
+ return mnt;
+}
+
+static int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr;
+ char tmp[PATH_MAX];
+
+ if (strlen(pathname) > 2 && pathname[0] == 'm' &&
+ pathname[1] == ':' && bpf_get_tc_dir()) {
+ snprintf(tmp, sizeof(tmp), "%s/%s",
+ bpf_get_tc_dir(), pathname + 2);
+ pathname = tmp;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.pathname = bpf_ptr_to_u64(pathname);
+
+ return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
const char *bpf_default_section(const enum bpf_prog_type type)
{
switch (type) {
@@ -173,18 +461,262 @@ const char *bpf_default_section(const enum bpf_prog_type type)
}
}
+enum bpf_mode {
+ CBPF_BYTECODE = 0,
+ CBPF_FILE,
+ EBPF_OBJECT,
+ EBPF_PINNED,
+ __BPF_MODE_MAX,
+#define BPF_MODE_MAX __BPF_MODE_MAX
+};
+
+static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl,
+ enum bpf_prog_type *type, enum bpf_mode *mode,
+ const char **ptr_object, const char **ptr_section,
+ const char **ptr_uds_name, struct sock_filter *opcodes)
+{
+ const char *file, *section, *uds_name;
+ bool verbose = false;
+ int ret, argc;
+ char **argv;
+
+ argv = *ptr_argv;
+ argc = *ptr_argc;
+
+ if (opt_tbl[CBPF_BYTECODE] &&
+ (matches(*argv, "bytecode") == 0 ||
+ strcmp(*argv, "bc") == 0)) {
+ *mode = CBPF_BYTECODE;
+ } else if (opt_tbl[CBPF_FILE] &&
+ (matches(*argv, "bytecode-file") == 0 ||
+ strcmp(*argv, "bcf") == 0)) {
+ *mode = CBPF_FILE;
+ } else if (opt_tbl[EBPF_OBJECT] &&
+ (matches(*argv, "object-file") == 0 ||
+ strcmp(*argv, "obj") == 0)) {
+ *mode = EBPF_OBJECT;
+ } else if (opt_tbl[EBPF_PINNED] &&
+ (matches(*argv, "object-pinned") == 0 ||
+ matches(*argv, "pinned") == 0 ||
+ matches(*argv, "fd") == 0)) {
+ *mode = EBPF_PINNED;
+ } else {
+ fprintf(stderr, "What mode is \"%s\"?\n", *argv);
+ return -1;
+ }
+
+ NEXT_ARG();
+ file = section = uds_name = NULL;
+ if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
+ file = *argv;
+ NEXT_ARG_FWD();
+
+ if (*type == BPF_PROG_TYPE_UNSPEC) {
+ if (argc > 0 && matches(*argv, "type") == 0) {
+ NEXT_ARG();
+ if (matches(*argv, "cls") == 0) {
+ *type = BPF_PROG_TYPE_SCHED_CLS;
+ } else if (matches(*argv, "act") == 0) {
+ *type = BPF_PROG_TYPE_SCHED_ACT;
+ } else {
+ fprintf(stderr, "What type is \"%s\"?\n",
+ *argv);
+ return -1;
+ }
+ NEXT_ARG_FWD();
+ } else {
+ *type = BPF_PROG_TYPE_SCHED_CLS;
+ }
+ }
+
+ section = bpf_default_section(*type);
+ if (argc > 0 && matches(*argv, "section") == 0) {
+ NEXT_ARG();
+ section = *argv;
+ NEXT_ARG_FWD();
+ }
+
+ uds_name = getenv(BPF_ENV_UDS);
+ if (argc > 0 && !uds_name &&
+ matches(*argv, "export") == 0) {
+ NEXT_ARG();
+ uds_name = *argv;
+ NEXT_ARG_FWD();
+ }
+
+ if (argc > 0 && matches(*argv, "verbose") == 0) {
+ verbose = true;
+ NEXT_ARG_FWD();
+ }
+
+ PREV_ARG();
+ }
+
+ if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
+ ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE);
+ else if (*mode == EBPF_OBJECT)
+ ret = bpf_obj_open(file, *type, section, verbose);
+ else if (*mode == EBPF_PINNED)
+ ret = bpf_obj_get(file);
+ else
+ return -1;
+
+ if (ptr_object)
+ *ptr_object = file;
+ if (ptr_section)
+ *ptr_section = section;
+ if (ptr_uds_name)
+ *ptr_uds_name = uds_name;
+
+ *ptr_argc = argc;
+ *ptr_argv = argv;
+
+ return ret;
+}
+
+int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
+ enum bpf_prog_type type, const char **ptr_object,
+ const char **ptr_uds_name, struct nlmsghdr *n)
+{
+ struct sock_filter opcodes[BPF_MAXINSNS];
+ const bool opt_tbl[BPF_MODE_MAX] = {
+ [CBPF_BYTECODE] = true,
+ [CBPF_FILE] = true,
+ [EBPF_OBJECT] = true,
+ [EBPF_PINNED] = true,
+ };
+ char annotation[256];
+ const char *section;
+ enum bpf_mode mode;
+ int ret;
+
+ ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode,
+ ptr_object, &section, ptr_uds_name, opcodes);
+ if (ret < 0)
+ return ret;
+
+ if (mode == CBPF_BYTECODE || mode == CBPF_FILE) {
+ addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret);
+ addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes,
+ ret * sizeof(struct sock_filter));
+ }
+
+ if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
+ snprintf(annotation, sizeof(annotation), "%s:[%s]",
+ basename(*ptr_object), mode == EBPF_PINNED ?
+ "*fsobj" : section);
+
+ addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret);
+ addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation);
+ }
+
+ return 0;
+}
+
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
+{
+ enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
+ const bool opt_tbl[BPF_MODE_MAX] = {
+ [CBPF_BYTECODE] = false,
+ [CBPF_FILE] = false,
+ [EBPF_OBJECT] = true,
+ [EBPF_PINNED] = true,
+ };
+ const struct bpf_elf_map test = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .size_key = sizeof(int),
+ .size_value = sizeof(int),
+ };
+ int ret, prog_fd, map_fd;
+ const char *section;
+ enum bpf_mode mode;
+ uint32_t map_key;
+
+ prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode,
+ NULL, &section, NULL, NULL);
+ if (prog_fd < 0)
+ return prog_fd;
+ if (key) {
+ map_key = *key;
+ } else {
+ ret = sscanf(section, "%*i/%i", &map_key);
+ if (ret != 1) {
+ fprintf(stderr, "Couldn\'t infer map key from section "
+ "name! Please provide \'key\' argument!\n");
+ ret = -EINVAL;
+ goto out_prog;
+ }
+ }
+
+ map_fd = bpf_obj_get(map_path);
+ if (map_fd < 0) {
+ fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
+ map_path, strerror(errno));
+ ret = map_fd;
+ goto out_prog;
+ }
+
+ ret = bpf_map_selfcheck_pinned(map_fd, &test,
+ offsetof(struct bpf_elf_map, max_elem));
+ if (ret < 0) {
+ fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
+ goto out_map;
+ }
+
+ ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
+ if (ret < 0)
+ fprintf(stderr, "Map update failed: %s\n", strerror(errno));
+out_map:
+ close(map_fd);
+out_prog:
+ close(prog_fd);
+ return ret;
+}
+
#ifdef HAVE_ELF
+struct bpf_elf_prog {
+ enum bpf_prog_type type;
+ const struct bpf_insn *insns;
+ size_t size;
+ const char *license;
+};
+
+struct bpf_hash_entry {
+ unsigned int pinning;
+ const char *subpath;
+ struct bpf_hash_entry *next;
+};
+
+struct bpf_elf_ctx {
+ Elf *elf_fd;
+ GElf_Ehdr elf_hdr;
+ Elf_Data *sym_tab;
+ Elf_Data *str_tab;
+ int obj_fd;
+ int map_fds[ELF_MAX_MAPS];
+ struct bpf_elf_map maps[ELF_MAX_MAPS];
+ int sym_num;
+ int map_num;
+ bool *sec_done;
+ int sec_maps;
+ char license[ELF_MAX_LICENSE_LEN];
+ enum bpf_prog_type type;
+ bool verbose;
+ struct bpf_elf_st stat;
+ struct bpf_hash_entry *ht[256];
+};
+
struct bpf_elf_sec_data {
- GElf_Shdr sec_hdr;
- char *sec_name;
- Elf_Data *sec_data;
+ GElf_Shdr sec_hdr;
+ Elf_Data *sec_data;
+ const char *sec_name;
};
struct bpf_map_data {
- int *fds;
- const char *obj;
- struct bpf_elf_st *st;
- struct bpf_elf_map *ent;
+ int *fds;
+ const char *obj;
+ struct bpf_elf_st *st;
+ struct bpf_elf_map *ent;
};
/* If we provide a small buffer with log level enabled, the kernel
@@ -193,15 +725,8 @@ struct bpf_map_data {
* verifier we still want to hand something descriptive to the user.
*/
static char bpf_log_buf[65536];
-static bool bpf_verbose;
-static struct bpf_elf_st bpf_st;
-
-static int map_fds[ELF_MAX_MAPS];
-static struct bpf_elf_map map_ent[ELF_MAX_MAPS];
-
-static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2);
-static void bpf_dump_error(const char *format, ...)
+static __check_format_string(1, 2) void bpf_dump_error(const char *format, ...)
{
va_list vl;
@@ -215,187 +740,431 @@ static void bpf_dump_error(const char *format, ...)
}
}
-static void bpf_save_finfo(int file_fd)
+static int bpf_map_create(enum bpf_map_type type, unsigned int size_key,
+ unsigned int size_value, unsigned int max_elem)
{
- struct stat st;
- int ret;
+ union bpf_attr attr = {
+ .map_type = type,
+ .key_size = size_key,
+ .value_size = size_value,
+ .max_entries = max_elem,
+ };
- memset(&bpf_st, 0, sizeof(bpf_st));
+ return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
- ret = fstat(file_fd, &st);
- if (ret < 0) {
- fprintf(stderr, "Stat of elf file failed: %s\n",
- strerror(errno));
- return;
+static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t size, const char *license)
+{
+ union bpf_attr attr = {
+ .prog_type = type,
+ .insns = bpf_ptr_to_u64(insns),
+ .insn_cnt = size / sizeof(struct bpf_insn),
+ .license = bpf_ptr_to_u64(license),
+ .log_buf = bpf_ptr_to_u64(bpf_log_buf),
+ .log_size = sizeof(bpf_log_buf),
+ .log_level = 1,
+ };
+
+ if (getenv(BPF_ENV_NOLOG)) {
+ attr.log_buf = 0;
+ attr.log_size = 0;
+ attr.log_level = 0;
}
- bpf_st.st_dev = st.st_dev;
- bpf_st.st_ino = st.st_ino;
+ return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
-static void bpf_clear_finfo(void)
+static int bpf_obj_pin(int fd, const char *pathname)
{
- memset(&bpf_st, 0, sizeof(bpf_st));
+ union bpf_attr attr = {
+ .pathname = bpf_ptr_to_u64(pathname),
+ .bpf_fd = fd,
+ };
+
+ return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
}
-static bool bpf_may_skip_map_creation(int file_fd)
+static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
{
- struct stat st;
- int ret;
+ struct sockaddr_alg alg = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash",
+ .salg_name = "sha1",
+ };
+ int ret, cfd, ofd, ffd;
+ struct stat stbuff;
+ ssize_t size;
+
+ if (!object || len != 20)
+ return -EINVAL;
- ret = fstat(file_fd, &st);
+ cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
+ strerror(errno));
+ return cfd;
+ }
+
+ ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
if (ret < 0) {
- fprintf(stderr, "Stat of elf file failed: %s\n",
+ fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
+ goto out_cfd;
+ }
+
+ ofd = accept(cfd, NULL, 0);
+ if (ofd < 0) {
+ fprintf(stderr, "Error accepting socket: %s\n",
strerror(errno));
- return false;
+ ret = ofd;
+ goto out_cfd;
+ }
+
+ ffd = open(object, O_RDONLY);
+ if (ffd < 0) {
+ fprintf(stderr, "Error opening object %s: %s\n",
+ object, strerror(errno));
+ ret = ffd;
+ goto out_ofd;
+ }
+
+ ret = fstat(ffd, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "Error doing fstat: %s\n",
+ strerror(errno));
+ goto out_ffd;
}
- return (bpf_st.st_dev == st.st_dev) &&
- (bpf_st.st_ino == st.st_ino);
+ size = sendfile(ofd, ffd, NULL, stbuff.st_size);
+ if (size != stbuff.st_size) {
+ fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
+ size, stbuff.st_size, strerror(errno));
+ ret = -1;
+ goto out_ffd;
+ }
+
+ size = read(ofd, out, len);
+ if (size != len) {
+ fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
+ size, len, strerror(errno));
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+out_ffd:
+ close(ffd);
+out_ofd:
+ close(ofd);
+out_cfd:
+ close(cfd);
+ return ret;
}
-static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
- unsigned int size_value, unsigned int max_elem)
+static const char *bpf_get_obj_uid(const char *pathname)
{
- union bpf_attr attr = {
- .map_type = type,
- .key_size = size_key,
- .value_size = size_value,
- .max_entries = max_elem,
- };
+ static bool bpf_uid_cached = false;
+ static char bpf_uid[64];
+ uint8_t tmp[20];
+ int ret;
- return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (bpf_uid_cached)
+ goto done;
+
+ ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
+ if (ret) {
+ fprintf(stderr, "Object hashing failed!\n");
+ return NULL;
+ }
+
+ hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
+ bpf_uid_cached = true;
+done:
+ return bpf_uid;
}
-static int bpf_update_map(int fd, const void *key, const void *value,
- uint64_t flags)
+static int bpf_init_env(const char *pathname)
{
- union bpf_attr attr = {
- .map_fd = fd,
- .key = bpf_ptr_to_u64(key),
- .value = bpf_ptr_to_u64(value),
- .flags = flags,
+ struct rlimit limit = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
};
- return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ /* Don't bother in case we fail! */
+ setrlimit(RLIMIT_MEMLOCK, &limit);
+
+ if (!bpf_get_tc_dir()) {
+ fprintf(stderr, "Continuing without mounted eBPF fs. "
+ "Too old kernel?\n");
+ return 0;
+ }
+
+ if (!bpf_get_obj_uid(pathname))
+ return -1;
+
+ return 0;
}
-static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
- unsigned int len, const char *license)
+static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
{
- union bpf_attr attr = {
- .prog_type = type,
- .insns = bpf_ptr_to_u64(insns),
- .insn_cnt = len / sizeof(struct bpf_insn),
- .license = bpf_ptr_to_u64(license),
- .log_buf = bpf_ptr_to_u64(bpf_log_buf),
- .log_size = sizeof(bpf_log_buf),
- .log_level = 1,
- };
+ struct bpf_hash_entry *entry;
- return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ while (entry && entry->pinning != pinning)
+ entry = entry->next;
+
+ return entry ? entry->subpath : NULL;
}
-static int bpf_prog_attach(enum bpf_prog_type type, const char *sec,
- const struct bpf_insn *insns, unsigned int size,
- const char *license)
+static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
{
- int prog_fd = bpf_prog_load(type, insns, size, license);
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return false;
+ case PIN_NONE:
+ return true;
+ default:
+ return !bpf_custom_pinning(ctx, pinning);
+ }
+}
- if (prog_fd < 0 || bpf_verbose) {
- bpf_dump_error("%s (section \'%s\'): %s\n", prog_fd < 0 ?
- "BPF program rejected" :
- "BPF program verification",
- sec, strerror(errno));
+static void bpf_make_pathname(char *pathname, size_t len, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
+ bpf_get_obj_uid(NULL), name);
+ break;
+ case PIN_GLOBAL_NS:
+ snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
+ BPF_DIR_GLOBALS, name);
+ break;
+ default:
+ snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(),
+ bpf_custom_pinning(ctx, pinning), name);
+ break;
}
+}
+
+static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ char pathname[PATH_MAX];
- return prog_fd;
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
+ return 0;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_get(pathname);
}
-static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
- unsigned int size_value, unsigned int max_elem)
+static int bpf_make_obj_path(void)
{
- int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
+ char tmp[PATH_MAX];
+ int ret;
- if (map_fd < 0)
- bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
+ snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(),
+ bpf_get_obj_uid(NULL));
- return map_fd;
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
+ return ret;
+ }
+
+ return 0;
}
-static void bpf_maps_init(void)
+static int bpf_make_custom_path(const char *todo)
{
- int i;
+ char tmp[PATH_MAX], rem[PATH_MAX], *sub;
+ int ret;
+
+ snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir());
+ snprintf(rem, sizeof(rem), "%s/", todo);
+ sub = strtok(rem, "/");
- memset(map_ent, 0, sizeof(map_ent));
- for (i = 0; i < ARRAY_SIZE(map_fds); i++)
- map_fds[i] = -1;
+ while (sub) {
+ if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
+ return -EINVAL;
+
+ strcat(tmp, sub);
+ strcat(tmp, "/");
+
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp,
+ strerror(errno));
+ return ret;
+ }
+
+ sub = strtok(NULL, "/");
+ }
+
+ return 0;
}
-static int bpf_maps_count(void)
+static int bpf_place_pinned(int fd, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
{
- int i, count = 0;
+ char pathname[PATH_MAX];
+ const char *tmp;
+ int ret = 0;
- for (i = 0; i < ARRAY_SIZE(map_fds); i++) {
- if (map_fds[i] < 0)
- break;
- count++;
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
+ return 0;
+
+ if (pinning == PIN_OBJECT_NS)
+ ret = bpf_make_obj_path();
+ else if ((tmp = bpf_custom_pinning(ctx, pinning)))
+ ret = bpf_make_custom_path(tmp);
+ if (ret < 0)
+ return ret;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_pin(fd, pathname);
+}
+
+static int bpf_prog_attach(const char *section,
+ const struct bpf_elf_prog *prog, bool verbose)
+{
+ int fd;
+
+ /* We can add pinning here later as well, same as bpf_map_attach(). */
+ errno = 0;
+ fd = bpf_prog_load(prog->type, prog->insns, prog->size,
+ prog->license);
+ if (fd < 0 || verbose) {
+ bpf_dump_error("Prog section \'%s\' (type:%u insns:%zu "
+ "license:\'%s\') %s%s (%d)!\n\n",
+ section, prog->type,
+ prog->size / sizeof(struct bpf_insn),
+ prog->license, fd < 0 ? "rejected :" :
+ "loaded", fd < 0 ? strerror(errno) : "",
+ fd < 0 ? errno : fd);
}
- return count;
+ return fd;
}
-static void bpf_maps_destroy(void)
+static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
+ const struct bpf_elf_ctx *ctx, bool verbose)
{
+ int fd, ret;
+
+ fd = bpf_probe_pinned(name, ctx, map->pinning);
+ if (fd > 0) {
+ ret = bpf_map_selfcheck_pinned(fd, map,
+ offsetof(struct bpf_elf_map,
+ id));
+ if (ret < 0) {
+ close(fd);
+ fprintf(stderr, "Map \'%s\' self-check failed!\n",
+ name);
+ return ret;
+ }
+ if (verbose)
+ fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
+ name);
+ return fd;
+ }
+
+ errno = 0;
+ fd = bpf_map_create(map->type, map->size_key, map->size_value,
+ map->max_elem);
+ if (fd < 0 || verbose) {
+ bpf_dump_error("Map \'%s\' (type:%u id:%u pinning:%u "
+ "ksize:%u vsize:%u max-elems:%u) %s%s (%d)!\n",
+ name, map->type, map->id, map->pinning,
+ map->size_key, map->size_value, map->max_elem,
+ fd < 0 ? "rejected: " : "loaded", fd < 0 ?
+ strerror(errno) : "", fd < 0 ? errno : fd);
+ if (fd < 0)
+ return fd;
+ }
+
+ ret = bpf_place_pinned(fd, name, ctx, map->pinning);
+ if (ret < 0 && errno != EEXIST) {
+ fprintf(stderr, "Could not pin %s map: %s\n", name,
+ strerror(errno));
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+#define __ELF_ST_BIND(x) ((x) >> 4)
+#define __ELF_ST_TYPE(x) (((unsigned int) x) & 0xf)
+
+static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
+ const GElf_Sym *sym)
+{
+ return ctx->str_tab->d_buf + sym->st_name;
+}
+
+static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
+{
+ GElf_Sym sym;
int i;
- memset(map_ent, 0, sizeof(map_ent));
- for (i = 0; i < ARRAY_SIZE(map_fds); i++) {
- if (map_fds[i] >= 0)
- close(map_fds[i]);
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+
+ if (__ELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ __ELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps ||
+ sym.st_value / sizeof(struct bpf_elf_map) != which)
+ continue;
+
+ return bpf_str_tab_name(ctx, &sym);
}
+
+ return NULL;
}
-static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps)
+static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
{
- int i, ret;
+ const char *map_name;
+ int i, fd;
- for (i = 0; (i < num_maps) && (num_maps <= ARRAY_SIZE(map_fds)); i++) {
- struct bpf_elf_map *map = &maps[i];
+ for (i = 0; i < ctx->map_num; i++) {
+ map_name = bpf_map_fetch_name(ctx, i);
+ if (!map_name)
+ return -EIO;
- ret = bpf_map_attach(map->type, map->size_key,
- map->size_value, map->max_elem);
- if (ret < 0)
- goto err_unwind;
+ fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
+ ctx->verbose);
+ if (fd < 0)
+ return fd;
- map_fds[i] = ret;
+ ctx->map_fds[i] = fd;
}
return 0;
-
-err_unwind:
- bpf_maps_destroy();
- return ret;
}
-static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
- struct bpf_elf_sec_data *sec_data)
+static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
{
+ Elf_Data *sec_edata;
GElf_Shdr sec_hdr;
Elf_Scn *sec_fd;
- Elf_Data *sec_edata;
char *sec_name;
- memset(sec_data, 0, sizeof(*sec_data));
+ memset(data, 0, sizeof(*data));
- sec_fd = elf_getscn(elf_fd, sec_index);
+ sec_fd = elf_getscn(ctx->elf_fd, section);
if (!sec_fd)
return -EINVAL;
-
if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
return -EIO;
- sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
+ sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
sec_hdr.sh_name);
if (!sec_name || !sec_hdr.sh_size)
return -ENOENT;
@@ -404,16 +1173,131 @@ static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
if (!sec_edata || elf_getdata(sec_fd, sec_edata))
return -EIO;
- memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
- sec_data->sec_name = sec_name;
- sec_data->sec_data = sec_edata;
+ memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
+
+ data->sec_name = sec_name;
+ data->sec_data = sec_edata;
+ return 0;
+}
+
+static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0)
+ return -EINVAL;
+
+ ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map);
+ ctx->sec_maps = section;
+ ctx->sec_done[section] = true;
+
+ if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) {
+ fprintf(stderr, "Too many BPF maps in ELF section!\n");
+ return -ENOMEM;
+ }
+
+ memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size);
+ return 0;
+}
+
+static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ if (data->sec_data->d_size > sizeof(ctx->license))
+ return -ENOMEM;
+
+ memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
+ ctx->sec_done[section] = true;
+ return 0;
+}
+static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->sym_tab = data->sec_data;
+ ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
+ ctx->sec_done[section] = true;
return 0;
}
-static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
- struct bpf_elf_sec_data *data_insn,
- Elf_Data *sym_tab)
+static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->str_tab = data->sec_data;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ int i, ret = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0)
+ continue;
+
+ if (!strcmp(data.sec_name, ELF_SECTION_MAPS))
+ ret = bpf_fetch_maps(ctx, i, &data);
+ else if (!strcmp(data.sec_name, ELF_SECTION_LICENSE))
+ ret = bpf_fetch_license(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_SYMTAB)
+ ret = bpf_fetch_symtab(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_STRTAB &&
+ i != ctx->elf_hdr.e_shstrndx)
+ ret = bpf_fetch_strtab(ctx, i, &data);
+ if (ret < 0) {
+ fprintf(stderr, "Error parsing section %d! Perhaps"
+ "check with readelf -a?\n", i);
+ break;
+ }
+ }
+
+ if (ctx->sym_tab && ctx->str_tab && ctx->sec_maps) {
+ ret = bpf_maps_attach_all(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error loading maps into kernel!\n");
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section)
+{
+ struct bpf_elf_sec_data data;
+ struct bpf_elf_prog prog;
+ int ret, i, fd = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
+
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0 || strcmp(data.sec_name, section))
+ continue;
+
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data.sec_data->d_buf;
+ prog.size = data.sec_data->d_size;
+ prog.license = ctx->license;
+
+ fd = bpf_prog_attach(section, &prog, ctx->verbose);
+ if (fd < 0)
+ continue;
+
+ ctx->sec_done[i] = true;
+ break;
+ }
+
+ return fd;
+}
+
+static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
+ struct bpf_elf_sec_data *data_relo,
+ struct bpf_elf_sec_data *data_insn)
{
Elf_Data *idata = data_insn->sec_data;
GElf_Shdr *rhdr = &data_relo->sec_hdr;
@@ -422,7 +1306,7 @@ static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
unsigned int num_insns = idata->d_size / sizeof(*insns);
for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
- unsigned int ioff, fnum;
+ unsigned int ioff, rmap;
GElf_Rel relo;
GElf_Sym sym;
@@ -430,291 +1314,367 @@ static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
return -EIO;
ioff = relo.r_offset / sizeof(struct bpf_insn);
- if (ioff >= num_insns)
- return -EINVAL;
- if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
+ if (ioff >= num_insns ||
+ insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
return -EINVAL;
- if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
+ if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
return -EIO;
- fnum = sym.st_value / sizeof(struct bpf_elf_map);
- if (fnum >= ARRAY_SIZE(map_fds))
+ rmap = sym.st_value / sizeof(struct bpf_elf_map);
+ if (rmap >= ARRAY_SIZE(ctx->map_fds))
return -EINVAL;
- if (map_fds[fnum] < 0)
+ if (!ctx->map_fds[rmap])
return -EINVAL;
+ if (ctx->verbose)
+ fprintf(stderr, "Map \'%s\' (%d) injected into prog "
+ "section \'%s\' at offset %u!\n",
+ bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
+ data_insn->sec_name, ioff);
+
insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
- insns[ioff].imm = map_fds[fnum];
+ insns[ioff].imm = ctx->map_fds[rmap];
}
return 0;
}
-static int bpf_fetch_ancillary(int file_fd, Elf *elf_fd, GElf_Ehdr *elf_hdr,
- bool *sec_done, char *license, unsigned int lic_len,
- Elf_Data **sym_tab)
+static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section)
{
- int sec_index, ret = -1;
+ struct bpf_elf_sec_data data_relo, data_insn;
+ struct bpf_elf_prog prog;
+ int ret, idx, i, fd = -1;
- for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
- struct bpf_elf_sec_data data_anc;
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data_relo);
+ if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
+ continue;
- ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
- &data_anc);
+ idx = data_relo.sec_hdr.sh_info;
+ ret = bpf_fill_section_data(ctx, idx, &data_insn);
+ if (ret < 0 || strcmp(data_insn.sec_name, section))
+ continue;
+
+ ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
if (ret < 0)
continue;
- /* Extract and load eBPF map fds. */
- if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS) &&
- !bpf_may_skip_map_creation(file_fd)) {
- struct bpf_elf_map *maps;
- unsigned int maps_num;
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data_insn.sec_data->d_buf;
+ prog.size = data_insn.sec_data->d_size;
+ prog.license = ctx->license;
- if (data_anc.sec_data->d_size % sizeof(*maps) != 0)
- return -EINVAL;
+ fd = bpf_prog_attach(section, &prog, ctx->verbose);
+ if (fd < 0)
+ continue;
- maps = data_anc.sec_data->d_buf;
- maps_num = data_anc.sec_data->d_size / sizeof(*maps);
- memcpy(map_ent, maps, data_anc.sec_data->d_size);
+ ctx->sec_done[i] = true;
+ ctx->sec_done[idx] = true;
+ break;
+ }
- ret = bpf_maps_attach(maps, maps_num);
- if (ret < 0)
- return ret;
+ return fd;
+}
- sec_done[sec_index] = true;
- }
- /* Extract eBPF license. */
- else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
- if (data_anc.sec_data->d_size > lic_len)
- return -ENOMEM;
-
- sec_done[sec_index] = true;
- memcpy(license, data_anc.sec_data->d_buf,
- data_anc.sec_data->d_size);
- }
- /* Extract symbol table for relocations (map fd fixups). */
- else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
- sec_done[sec_index] = true;
- *sym_tab = data_anc.sec_data;
- }
- }
+static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
+{
+ int ret = -1;
+
+ if (ctx->sym_tab)
+ ret = bpf_fetch_prog_relo(ctx, section);
+ if (ret < 0)
+ ret = bpf_fetch_prog(ctx, section);
return ret;
}
-static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
- enum bpf_prog_type type, const char *sec,
- const char *license, Elf_Data *sym_tab)
+static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
{
- int sec_index, prog_fd = -1;
+ int i;
- for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
- struct bpf_elf_sec_data data_relo, data_insn;
- int ins_index, ret;
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
+ if (ctx->map_fds[i] && ctx->maps[i].id == id &&
+ ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ return i;
+ return -1;
+}
- /* Attach eBPF programs with relocation data (maps). */
- ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
- &data_relo);
- if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
- continue;
+static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ uint32_t map_id, key_id;
+ int fd, i, ret, idx;
- ins_index = data_relo.sec_hdr.sh_info;
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
- ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
- &data_insn);
+ ret = bpf_fill_section_data(ctx, i, &data);
if (ret < 0)
continue;
- if (strcmp(data_insn.sec_name, sec))
- continue;
- ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab);
- if (ret < 0)
+ ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
+ if (ret != 2)
continue;
- prog_fd = bpf_prog_attach(type, sec, data_insn.sec_data->d_buf,
- data_insn.sec_data->d_size, license);
- if (prog_fd < 0)
+ idx = bpf_find_map_by_id(ctx, map_id);
+ if (idx < 0)
continue;
- sec_done[sec_index] = true;
- sec_done[ins_index] = true;
- break;
+ fd = bpf_fetch_prog_sec(ctx, data.sec_name);
+ if (fd < 0)
+ return -EIO;
+
+ ret = bpf_map_update(ctx->map_fds[idx], &key_id,
+ &fd, BPF_ANY);
+ if (ret < 0)
+ return -ENOENT;
+
+ ctx->sec_done[i] = true;
}
- return prog_fd;
+ return 0;
}
-static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
- enum bpf_prog_type type, const char *sec,
- const char *license)
+static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
{
- int sec_index, prog_fd = -1;
+ struct stat st;
+ int ret;
- for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
- struct bpf_elf_sec_data data_insn;
- int ret;
+ memset(&ctx->stat, 0, sizeof(ctx->stat));
- /* Attach eBPF programs without relocation data. */
- if (sec_done[sec_index])
- continue;
+ ret = fstat(ctx->obj_fd, &st);
+ if (ret < 0) {
+ fprintf(stderr, "Stat of elf file failed: %s\n",
+ strerror(errno));
+ return;
+ }
- ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
- &data_insn);
- if (ret < 0)
- continue;
- if (strcmp(data_insn.sec_name, sec))
- continue;
+ ctx->stat.st_dev = st.st_dev;
+ ctx->stat.st_ino = st.st_ino;
+}
- prog_fd = bpf_prog_attach(type, sec, data_insn.sec_data->d_buf,
- data_insn.sec_data->d_size, license);
- if (prog_fd < 0)
+static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
+{
+ char buff[PATH_MAX];
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ char *ptr = buff;
+
+ while (*ptr == ' ' || *ptr == '\t')
+ ptr++;
+
+ if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
continue;
- sec_done[sec_index] = true;
- break;
+ if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
+ sscanf(ptr, "%i %s #", id, path) != 2) {
+ strcpy(path, ptr);
+ return -1;
+ }
+
+ return 1;
}
- return prog_fd;
+ return 0;
}
-static int bpf_fetch_prog_sec(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
- enum bpf_prog_type type, const char *sec,
- const char *license, Elf_Data *sym_tab)
+static bool bpf_pinning_reserved(uint32_t pinning)
{
- int ret = -1;
-
- if (sym_tab)
- ret = bpf_fetch_prog_relo(elf_fd, elf_hdr, sec_done, type,
- sec, license, sym_tab);
- if (ret < 0)
- ret = bpf_fetch_prog(elf_fd, elf_hdr, sec_done, type, sec,
- license);
- return ret;
+ switch (pinning) {
+ case PIN_NONE:
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return true;
+ default:
+ return false;
+ }
}
-static int bpf_fill_prog_arrays(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
- enum bpf_prog_type type, const char *license,
- Elf_Data *sym_tab)
+static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
{
- int sec_index;
+ struct bpf_hash_entry *entry;
+ char subpath[PATH_MAX];
+ uint32_t pinning;
+ FILE *fp;
+ int ret;
- for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
- struct bpf_elf_sec_data data_insn;
- int ret, map_id, key_id, prog_fd;
+ fp = fopen(db_file, "r");
+ if (!fp)
+ return;
- if (sec_done[sec_index])
+ memset(subpath, 0, sizeof(subpath));
+ while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
+ if (ret == -1) {
+ fprintf(stderr, "Database %s is corrupted at: %s\n",
+ db_file, subpath);
+ fclose(fp);
+ return;
+ }
+
+ if (bpf_pinning_reserved(pinning)) {
+ fprintf(stderr, "Database %s, id %u is reserved - "
+ "ignoring!\n", db_file, pinning);
continue;
+ }
- ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
- &data_insn);
- if (ret < 0)
+ entry = malloc(sizeof(*entry));
+ if (!entry) {
+ fprintf(stderr, "No memory left for db entry!\n");
continue;
+ }
- ret = sscanf(data_insn.sec_name, "%i/%i", &map_id, &key_id);
- if (ret != 2)
+ entry->pinning = pinning;
+ entry->subpath = strdup(subpath);
+ if (!entry->subpath) {
+ fprintf(stderr, "No memory left for db entry!\n");
+ free(entry);
continue;
+ }
- if (map_id >= ARRAY_SIZE(map_fds) || map_fds[map_id] < 0)
- return -ENOENT;
- if (map_ent[map_id].type != BPF_MAP_TYPE_PROG_ARRAY ||
- map_ent[map_id].max_elem <= key_id)
- return -EINVAL;
+ entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
+ }
- prog_fd = bpf_fetch_prog_sec(elf_fd, elf_hdr, sec_done,
- type, data_insn.sec_name,
- license, sym_tab);
- if (prog_fd < 0)
- return -EIO;
+ fclose(fp);
+}
- ret = bpf_update_map(map_fds[map_id], &key_id, &prog_fd,
- BPF_ANY);
- if (ret < 0)
- return -ENOENT;
+static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_hash_entry *entry;
+ int i;
- sec_done[sec_index] = true;
+ for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
+ while ((entry = ctx->ht[i]) != NULL) {
+ ctx->ht[i] = entry->next;
+ free((char *)entry->subpath);
+ free(entry);
+ }
}
-
- return 0;
}
-int bpf_open_object(const char *path, enum bpf_prog_type type,
- const char *sec, bool verbose)
+static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
+ enum bpf_prog_type type, bool verbose)
{
- char license[ELF_MAX_LICENSE_LEN];
- int file_fd, prog_fd = -1, ret;
- Elf_Data *sym_tab = NULL;
- GElf_Ehdr elf_hdr;
- bool *sec_done;
- Elf *elf_fd;
+ int ret = -EINVAL;
- if (elf_version(EV_CURRENT) == EV_NONE)
- return -EINVAL;
+ if (elf_version(EV_CURRENT) == EV_NONE ||
+ bpf_init_env(pathname))
+ return ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->verbose = verbose;
+ ctx->type = type;
- file_fd = open(path, O_RDONLY, 0);
- if (file_fd < 0)
- return -errno;
+ ctx->obj_fd = open(pathname, O_RDONLY);
+ if (ctx->obj_fd < 0)
+ return ctx->obj_fd;
- elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
- if (!elf_fd) {
+ ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
+ if (!ctx->elf_fd) {
ret = -EINVAL;
- goto out;
+ goto out_fd;
}
- if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
+ if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
+ &ctx->elf_hdr) {
ret = -EIO;
goto out_elf;
}
- sec_done = calloc(elf_hdr.e_shnum, sizeof(*sec_done));
- if (!sec_done) {
+ ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
+ sizeof(*(ctx->sec_done)));
+ if (!ctx->sec_done) {
ret = -ENOMEM;
goto out_elf;
}
- memset(license, 0, sizeof(license));
- bpf_verbose = verbose;
+ bpf_save_finfo(ctx);
+ bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
- if (!bpf_may_skip_map_creation(file_fd))
- bpf_maps_init();
+ return 0;
+out_elf:
+ elf_end(ctx->elf_fd);
+out_fd:
+ close(ctx->obj_fd);
+ return ret;
+}
- ret = bpf_fetch_ancillary(file_fd, elf_fd, &elf_hdr, sec_done,
- license, sizeof(license), &sym_tab);
- if (ret < 0)
- goto out_maps;
+static int bpf_maps_count(struct bpf_elf_ctx *ctx)
+{
+ int i, count = 0;
- prog_fd = bpf_fetch_prog_sec(elf_fd, &elf_hdr, sec_done, type,
- sec, license, sym_tab);
- if (prog_fd < 0)
- goto out_maps;
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (!ctx->map_fds[i])
+ break;
+ count++;
+ }
- if (!bpf_may_skip_map_creation(file_fd)) {
- ret = bpf_fill_prog_arrays(elf_fd, &elf_hdr, sec_done,
- type, license, sym_tab);
- if (ret < 0)
- goto out_prog;
+ return count;
+}
+
+static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (ctx->map_fds[i])
+ close(ctx->map_fds[i]);
}
+}
- bpf_save_finfo(file_fd);
+static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
+{
+ if (failure)
+ bpf_maps_teardown(ctx);
- free(sec_done);
+ bpf_hash_destroy(ctx);
+ free(ctx->sec_done);
+ elf_end(ctx->elf_fd);
+ close(ctx->obj_fd);
+}
- elf_end(elf_fd);
- close(file_fd);
+static struct bpf_elf_ctx __ctx;
- return prog_fd;
+static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
+ const char *section, bool verbose)
+{
+ struct bpf_elf_ctx *ctx = &__ctx;
+ int fd = 0, ret;
-out_prog:
- close(prog_fd);
-out_maps:
- bpf_maps_destroy();
- free(sec_done);
-out_elf:
- elf_end(elf_fd);
+ ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot initialize ELF context!\n");
+ return ret;
+ }
+
+ ret = bpf_fetch_ancillary(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error fetching ELF ancillary data!\n");
+ goto out;
+ }
+
+ fd = bpf_fetch_prog_sec(ctx, section);
+ if (fd < 0) {
+ fprintf(stderr, "Error fetching program/map!\n");
+ ret = fd;
+ goto out;
+ }
+
+ ret = bpf_fill_prog_arrays(ctx);
+ if (ret < 0)
+ fprintf(stderr, "Error filling program arrays!\n");
out:
- close(file_fd);
- bpf_clear_finfo();
- return prog_fd;
+ bpf_elf_ctx_destroy(ctx, ret < 0);
+ if (ret < 0) {
+ if (fd)
+ close(fd);
+ return ret;
+ }
+
+ return fd;
}
static int
@@ -803,6 +1763,7 @@ bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
int bpf_send_map_fds(const char *path, const char *obj)
{
+ struct bpf_elf_ctx *ctx = &__ctx;
struct sockaddr_un addr;
struct bpf_map_data bpf_aux;
int fd, ret;
@@ -827,18 +1788,18 @@ int bpf_send_map_fds(const char *path, const char *obj)
memset(&bpf_aux, 0, sizeof(bpf_aux));
- bpf_aux.fds = map_fds;
- bpf_aux.ent = map_ent;
-
+ bpf_aux.fds = ctx->map_fds;
+ bpf_aux.ent = ctx->maps;
+ bpf_aux.st = &ctx->stat;
bpf_aux.obj = obj;
- bpf_aux.st = &bpf_st;
ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
- bpf_maps_count());
+ bpf_maps_count(ctx));
if (ret < 0)
fprintf(stderr, "Cannot send fds to %s: %s\n",
path, strerror(errno));
+ bpf_maps_teardown(ctx);
close(fd);
return ret;
}
diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h
index 2ad8812..526d0b1 100644
--- a/tc/tc_bpf.h
+++ b/tc/tc_bpf.h
@@ -13,61 +13,57 @@
#ifndef _TC_BPF_H_
#define _TC_BPF_H_ 1
-#include <linux/filter.h>
#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#include <linux/bpf.h>
-#include <sys/syscall.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdint.h>
+#include <linux/magic.h>
#include "utils.h"
#include "bpf_scm.h"
+enum {
+ BPF_NLA_OPS_LEN = 0,
+ BPF_NLA_OPS,
+ BPF_NLA_FD,
+ BPF_NLA_NAME,
+ __BPF_NLA_MAX,
+};
+
+#define BPF_NLA_MAX __BPF_NLA_MAX
+
#define BPF_ENV_UDS "TC_BPF_UDS"
+#define BPF_ENV_MNT "TC_BPF_MNT"
+#define BPF_ENV_NOLOG "TC_BPF_NOLOG"
-int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
- char **bpf_string, bool *need_release,
- const char separator);
-int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
- bool from_file);
-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+#ifndef BPF_FS_MAGIC
+# define BPF_FS_MAGIC 0xcafe4a11
+#endif
+#define BPF_DIR_MNT "/sys/fs/bpf"
+
+#define BPF_DIR_TC "tc"
+#define BPF_DIR_GLOBALS "globals"
+
+#ifndef TRACEFS_MAGIC
+# define TRACEFS_MAGIC 0x74726163
+#endif
+
+#define TRACE_DIR_MNT "/sys/kernel/tracing"
+
+int bpf_trace_pipe(void);
const char *bpf_default_section(const enum bpf_prog_type type);
-#ifdef HAVE_ELF
-int bpf_open_object(const char *path, enum bpf_prog_type type,
- const char *sec, bool verbose);
+int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
+ enum bpf_prog_type type, const char **ptr_object,
+ const char **ptr_uds_name, struct nlmsghdr *n);
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
+void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+
+#ifdef HAVE_ELF
int bpf_send_map_fds(const char *path, const char *obj);
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
unsigned int entries);
-
-static inline __u64 bpf_ptr_to_u64(const void *ptr)
-{
- return (__u64) (unsigned long) ptr;
-}
-
-static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
-{
-#ifdef __NR_bpf
- return syscall(__NR_bpf, cmd, attr, size);
#else
- fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
- errno = ENOSYS;
- return -1;
-#endif
-}
-#else
-static inline int bpf_open_object(const char *path, enum bpf_prog_type type,
- const char *sec, bool verbose)
-{
- fprintf(stderr, "No ELF library support compiled in.\n");
- errno = ENOSYS;
- return -1;
-}
-
static inline int bpf_send_map_fds(const char *path, const char *obj)
{
return 0;