aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2016-02-05 13:08:01 -0800
committerElliott Hughes <enh@google.com>2016-02-05 13:08:01 -0800
commit572478960c5715509389837396f5b929fe3f0a6c (patch)
treeb3be93330e50bb3480000999dd0f4915a6b65b5b
parent0146a363ec4ee6a98910edbb97f0032e87375989 (diff)
parentfb2594c183fbedbe8f91fe7b1f7fed1331bb3194 (diff)
downloadplatform_external_iproute2-572478960c5715509389837396f5b929fe3f0a6c.tar.gz
platform_external_iproute2-572478960c5715509389837396f5b929fe3f0a6c.tar.bz2
platform_external_iproute2-572478960c5715509389837396f5b929fe3f0a6c.zip
Merge remote-tracking branch 'aosp/upstream-master' into mymerge
Change-Id: Ibc952037986c546d20e75479fd2983d07111ff88
-rw-r--r--Makefile29
-rw-r--r--README.iproute2+tc6
-rw-r--r--bridge/bridge.c93
-rw-r--r--bridge/fdb.c17
-rw-r--r--bridge/link.c2
-rw-r--r--bridge/mdb.c69
-rw-r--r--bridge/monitor.c1
-rw-r--r--bridge/vlan.c2
-rwxr-xr-xconfigure74
-rw-r--r--doc/ip-cref.tex47
-rw-r--r--etc/iproute2/bpf_pinning6
-rw-r--r--etc/iproute2/rt_tables.d/README3
-rw-r--r--examples/bpf/README13
-rw-r--r--examples/bpf/bpf_agent.c258
-rw-r--r--examples/bpf/bpf_cyclic.c30
-rw-r--r--examples/bpf/bpf_graft.c67
-rw-r--r--examples/bpf/bpf_prog.c499
-rw-r--r--examples/bpf/bpf_shared.c48
-rw-r--r--examples/bpf/bpf_shared.h22
-rw-r--r--examples/bpf/bpf_sys.h23
-rw-r--r--examples/bpf/bpf_tailcall.c99
-rw-r--r--examples/cbq.init-v0.7.38
-rw-r--r--genl/ctrl.c21
-rw-r--r--genl/genl.c2
-rw-r--r--include/SNAPSHOT.h2
-rw-r--r--include/bpf_api.h225
-rw-r--r--include/bpf_elf.h39
-rw-r--r--include/bpf_scm.h75
-rw-r--r--include/color.h16
-rw-r--r--include/dlfcn.h2
-rw-r--r--include/ip6tables.h141
-rw-r--r--include/iptables.h186
-rw-r--r--include/iptables/internal.h13
-rw-r--r--include/json_writer.h61
-rw-r--r--include/libiptc/ipt_kernel_headers.h13
-rw-r--r--include/libiptc/libip6tc.h127
-rw-r--r--include/libiptc/libiptc.h128
-rw-r--r--include/libiptc/libxtc.h33
-rw-r--r--include/libiptc/xtcshared.h20
-rw-r--r--include/libnetlink.h107
-rw-r--r--include/linux/bpf.h326
-rw-r--r--include/linux/can.h6
-rw-r--r--include/linux/fib_rules.h2
-rw-r--r--include/linux/filter.h10
-rw-r--r--include/linux/fou.h1
-rw-r--r--include/linux/if_addr.h2
-rw-r--r--include/linux/if_bridge.h2
-rw-r--r--include/linux/if_ether.h1
-rw-r--r--include/linux/if_link.h119
-rw-r--r--include/linux/if_tun.h6
-rw-r--r--include/linux/if_tunnel.h1
-rw-r--r--include/linux/ila.h37
-rw-r--r--include/linux/in.h299
-rw-r--r--include/linux/in6.h1
-rw-r--r--include/linux/inet_diag.h8
-rw-r--r--include/linux/libc-compat.h22
-rw-r--r--include/linux/lwtunnel.h43
-rw-r--r--include/linux/mpls.h46
-rw-r--r--include/linux/mpls_iptunnel.h28
-rw-r--r--include/linux/neighbour.h2
-rw-r--r--include/linux/netconf.h1
-rw-r--r--include/linux/netfilter.h9
-rw-r--r--include/linux/netlink.h18
-rw-r--r--include/linux/pkt_cls.h107
-rw-r--r--include/linux/pkt_sched.h11
-rw-r--r--include/linux/rtnetlink.h37
-rw-r--r--include/linux/sock_diag.h11
-rw-r--r--include/linux/tc_act/tc_bpf.h2
-rw-r--r--include/linux/tc_act/tc_connmark.h22
-rw-r--r--include/linux/tcp.h7
-rw-r--r--include/linux/tipc.h232
-rw-r--r--include/linux/tipc_netlink.h254
-rw-r--r--include/linux/xfrm.h2
-rw-r--r--include/ll_map.h17
-rw-r--r--include/namespace.h7
-rw-r--r--include/rt_names.h2
-rw-r--r--include/rtm_map.h3
-rw-r--r--include/utils.h123
-rw-r--r--include/xtables.h567
-rw-r--r--ip/Android.mk5
-rw-r--r--ip/Makefile5
-rw-r--r--ip/ip.c85
-rw-r--r--ip/ip6tunnel.c104
-rw-r--r--ip/ip_common.h109
-rw-r--r--ip/ipaddress.c371
-rw-r--r--ip/ipaddrlabel.c11
-rw-r--r--ip/ipfou.c5
-rw-r--r--ip/ipl2tp.c13
-rw-r--r--ip/iplink.c282
-rw-r--r--ip/iplink_bond.c151
-rw-r--r--ip/iplink_bond_slave.c9
-rw-r--r--ip/iplink_bridge.c99
-rw-r--r--ip/iplink_bridge_slave.c16
-rw-r--r--ip/iplink_geneve.c173
-rw-r--r--ip/iplink_macvlan.c68
-rw-r--r--ip/iplink_macvtap.c105
-rw-r--r--ip/iplink_vrf.c79
-rw-r--r--ip/iplink_vxlan.c42
-rw-r--r--ip/ipmaddr.c2
-rw-r--r--ip/ipmonitor.c87
-rw-r--r--ip/ipmroute.c31
-rw-r--r--ip/ipneigh.c53
-rw-r--r--ip/ipnetconf.c11
-rw-r--r--ip/ipnetns.c221
-rw-r--r--ip/ipntable.c6
-rw-r--r--ip/ipprefix.c5
-rw-r--r--ip/iproute.c269
-rw-r--r--ip/iproute_lwtunnel.c367
-rw-r--r--ip/iproute_lwtunnel.h8
-rw-r--r--ip/iprule.c122
-rw-r--r--ip/iptoken.c8
-rw-r--r--ip/iptunnel.c331
-rw-r--r--ip/ipxfrm.c19
-rw-r--r--ip/link_gre.c15
-rw-r--r--ip/link_gre6.c2
-rw-r--r--ip/link_ip6tnl.c4
-rw-r--r--ip/link_iptnl.c2
-rw-r--r--ip/link_vti.c2
-rw-r--r--ip/link_vti6.c2
-rw-r--r--ip/rtm_map.c10
-rw-r--r--ip/rtmon.c12
-rw-r--r--ip/tcp_metrics.c5
-rw-r--r--ip/tunnel.c67
-rw-r--r--ip/tunnel.h2
-rw-r--r--ip/xfrm_monitor.c32
-rw-r--r--ip/xfrm_policy.c144
-rw-r--r--ip/xfrm_state.c25
-rw-r--r--lib/Android.mk4
-rw-r--r--lib/Makefile5
-rw-r--r--lib/color.c64
-rw-r--r--lib/coverity_model.c19
-rw-r--r--lib/json_writer.c312
-rw-r--r--lib/libgenl.c2
-rw-r--r--lib/libnetlink.c153
-rw-r--r--lib/ll_addr.c2
-rw-r--r--lib/ll_map.c2
-rw-r--r--lib/mpls_ntop.c48
-rw-r--r--lib/mpls_pton.c58
-rw-r--r--lib/namespace.c13
-rw-r--r--lib/rt_names.c143
-rw-r--r--lib/utils.c178
-rw-r--r--man/man3/libnetlink.37
-rw-r--r--man/man8/Makefile11
-rw-r--r--man/man8/bridge.899
-rw-r--r--man/man8/genl.877
-rw-r--r--man/man8/ifcfg.848
-rw-r--r--man/man8/ifstat.859
-rw-r--r--man/man8/ip-address.8.in133
-rw-r--r--man/man8/ip-addrlabel.85
-rw-r--r--man/man8/ip-link.8.in364
-rw-r--r--man/man8/ip-monitor.838
-rw-r--r--man/man8/ip-neighbour.84
-rw-r--r--man/man8/ip-netns.810
-rw-r--r--man/man8/ip-ntable.84
-rw-r--r--man/man8/ip-route.8.in131
-rw-r--r--man/man8/ip-rule.830
-rw-r--r--man/man8/ip-tunnel.824
-rw-r--r--man/man8/ip-xfrm.884
-rw-r--r--man/man8/ip.839
-rw-r--r--man/man8/lnstat.8197
-rw-r--r--man/man8/routel.820
-rw-r--r--man/man8/rtacct.81
-rw-r--r--man/man8/rtmon.88
-rw-r--r--man/man8/rtpr.825
-rw-r--r--man/man8/ss.813
-rw-r--r--man/man8/tc-basic.834
-rw-r--r--man/man8/tc-bfifo.832
-rw-r--r--man/man8/tc-bpf.8924
-rw-r--r--man/man8/tc-cbq-details.8178
-rw-r--r--man/man8/tc-cbq.8158
-rw-r--r--man/man8/tc-cgroup.880
-rw-r--r--man/man8/tc-drr.81
-rw-r--r--man/man8/tc-flow.8265
-rw-r--r--man/man8/tc-flower.8113
-rw-r--r--man/man8/tc-fq.892
-rw-r--r--man/man8/tc-fw.866
-rw-r--r--man/man8/tc-htb.882
-rw-r--r--man/man8/tc-mqprio.82
-rw-r--r--man/man8/tc-netem.822
-rw-r--r--man/man8/tc-pfifo_fast.814
-rw-r--r--man/man8/tc-prio.828
-rw-r--r--man/man8/tc-red.862
-rw-r--r--man/man8/tc-route.874
-rw-r--r--man/man8/tc-sfq.840
-rw-r--r--man/man8/tc-tbf.848
-rw-r--r--man/man8/tc-tcindex.858
-rw-r--r--man/man8/tc-u32.8663
-rw-r--r--man/man8/tc.8155
-rw-r--r--man/man8/tipc-bearer.8231
-rw-r--r--man/man8/tipc-link.8226
-rw-r--r--man/man8/tipc-media.887
-rw-r--r--man/man8/tipc-nametable.8100
-rw-r--r--man/man8/tipc-node.872
-rw-r--r--man/man8/tipc-peer.852
-rw-r--r--man/man8/tipc-socket.859
-rw-r--r--man/man8/tipc.8100
-rw-r--r--misc/Makefile12
-rw-r--r--misc/arpd.c2
-rw-r--r--misc/ifstat.c109
-rw-r--r--misc/lnstat.c40
-rw-r--r--misc/lnstat_util.c37
-rw-r--r--misc/nstat.c65
-rw-r--r--misc/ss.c652
-rw-r--r--misc/ssfilter.h1
-rw-r--r--tc/Android.mk2
-rw-r--r--tc/Makefile15
-rw-r--r--tc/README.last2
-rw-r--r--tc/e_bpf.c179
-rw-r--r--tc/emp_ematch.y1
-rw-r--r--tc/f_basic.c7
-rw-r--r--tc/f_bpf.c147
-rw-r--r--tc/f_flower.c519
-rw-r--r--tc/f_route.c2
-rw-r--r--tc/f_rsvp.c2
-rw-r--r--tc/f_u32.c56
-rw-r--r--tc/m_action.c9
-rw-r--r--tc/m_bpf.c175
-rw-r--r--tc/m_connmark.c166
-rw-r--r--tc/m_ipt.c1
-rw-r--r--tc/m_pedit.c32
-rw-r--r--tc/m_simple.c2
-rw-r--r--tc/m_xt_old.c1
-rw-r--r--tc/p_tcp.c2
-rw-r--r--tc/p_udp.c1
-rw-r--r--tc/q_cbq.c1
-rw-r--r--tc/q_clsact.c34
-rw-r--r--tc/q_codel.c33
-rw-r--r--tc/q_fq.c41
-rw-r--r--tc/q_fq_codel.c31
-rw-r--r--tc/q_gred.c204
-rw-r--r--tc/q_ingress.c43
-rw-r--r--tc/q_netem.c1
-rw-r--r--tc/q_prio.c1
-rw-r--r--tc/q_red.c27
-rw-r--r--tc/q_tbf.c1
-rw-r--r--tc/tc.c15
-rw-r--r--tc/tc_bpf.c1764
-rw-r--r--tc/tc_bpf.h69
-rw-r--r--tc/tc_class.c2
-rw-r--r--tc/tc_common.h3
-rw-r--r--tc/tc_exec.c109
-rw-r--r--tc/tc_filter.c51
-rw-r--r--tc/tc_monitor.c16
-rw-r--r--tc/tc_qdisc.c34
-rw-r--r--tc/tc_stab.c21
-rw-r--r--tc/tc_util.c25
-rw-r--r--tc/tc_util.h16
-rw-r--r--testsuite/Makefile11
-rw-r--r--testsuite/lib/generic.sh80
-rwxr-xr-xtestsuite/tests/ip/link/new_link.t4
-rwxr-xr-xtestsuite/tests/ip/route/add_default_route.t33
-rwxr-xr-xtestsuite/tests/ip/tunnel/add_tunnel.t14
-rw-r--r--tipc/.gitignore1
-rw-r--r--tipc/Makefile27
-rw-r--r--tipc/README63
-rw-r--r--tipc/bearer.c725
-rw-r--r--tipc/bearer.h22
-rw-r--r--tipc/cmdl.c127
-rw-r--r--tipc/cmdl.h46
-rw-r--r--tipc/link.c520
-rw-r--r--tipc/link.h21
-rw-r--r--tipc/media.c260
-rw-r--r--tipc/media.h21
-rw-r--r--tipc/misc.c35
-rw-r--r--tipc/misc.h19
-rw-r--r--tipc/msg.c170
-rw-r--r--tipc/msg.h20
-rw-r--r--tipc/nametable.c109
-rw-r--r--tipc/nametable.h21
-rw-r--r--tipc/node.c267
-rw-r--r--tipc/node.h21
-rw-r--r--tipc/peer.c93
-rw-r--r--tipc/peer.h21
-rw-r--r--tipc/socket.c140
-rw-r--r--tipc/socket.h21
-rw-r--r--tipc/tipc.c99
276 files changed, 19895 insertions, 3066 deletions
diff --git a/Makefile b/Makefile
index 9dbb29f3..67176bef 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,15 @@
-ROOTDIR=$(DESTDIR)
-PREFIX=/usr
-LIBDIR=$(PREFIX)/lib
-SBINDIR=/sbin
-CONFDIR=/etc/iproute2
-DATADIR=$(PREFIX)/share
-DOCDIR=$(DATADIR)/doc/iproute2
-MANDIR=$(DATADIR)/man
-ARPDDIR=/var/lib/arpd
+PREFIX?=/usr
+LIBDIR?=$(PREFIX)/lib
+SBINDIR?=/sbin
+CONFDIR?=/etc/iproute2
+DATADIR?=$(PREFIX)/share
+DOCDIR?=$(DATADIR)/doc/iproute2
+MANDIR?=$(DATADIR)/man
+ARPDDIR?=/var/lib/arpd
+KERNEL_INCLUDE?=/usr/include
# Path to db_185.h include
-DBM_INCLUDE:=$(ROOTDIR)/usr/include
+DBM_INCLUDE:=$(DESTDIR)/usr/include
SHARED_LIBS = y
@@ -26,17 +26,22 @@ ADDLIB+=dnet_ntop.o dnet_pton.o
#options for ipx
ADDLIB+=ipx_ntop.o ipx_pton.o
+#options for mpls
+ADDLIB+=mpls_ntop.o mpls_pton.o
+
CC = gcc
HOSTCC = gcc
DEFINES += -D_GNU_SOURCE
+# Turn on transparent support for LFS
+DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
CCOPTS = -O2
WFLAGS := -Wall -Wstrict-prototypes -Wmissing-prototypes
WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2
-CFLAGS = $(WFLAGS) $(CCOPTS) -I../include $(DEFINES)
+CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS)
YACCFLAGS = -d -t -v
-SUBDIRS=lib ip tc bridge misc netem genl man
+SUBDIRS=lib ip tc bridge misc netem genl tipc man
LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
LDLIBS += $(LIBNETLINK)
diff --git a/README.iproute2+tc b/README.iproute2+tc
index 6aa5d184..2a5638da 100644
--- a/README.iproute2+tc
+++ b/README.iproute2+tc
@@ -72,12 +72,16 @@ ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
etc. The same thing can be made with rules.
I still did not test ipchains, but they should work too.
+
+Setup and code example of BPF classifier and action can be found under
+examples/bpf/, which should explain everything for getting started.
+
+
Setup of rsvp and u32 classifiers is more hairy.
If you read RSVP specs, you will understand how rsvp classifier
works easily. What's about u32... That's example:
-
#! /bin/sh
TC=/home/root/tc
diff --git a/bridge/bridge.c b/bridge/bridge.c
index 88469ca2..72f153f2 100644
--- a/bridge/bridge.c
+++ b/bridge/bridge.c
@@ -9,6 +9,7 @@
#include <unistd.h>
#include <sys/socket.h>
#include <string.h>
+#include <errno.h>
#include "SNAPSHOT.h"
#include "utils.h"
@@ -18,12 +19,14 @@
struct rtnl_handle rth = { .fd = -1 };
int preferred_family = AF_UNSPEC;
int resolve_hosts;
-int oneline = 0;
+int oneline;
int show_stats;
int show_details;
int compress_vlans;
int timestamp;
-char * _SL_ = NULL;
+char *batch_file;
+int force;
+const char *_SL_;
static void usage(void) __attribute__((noreturn));
@@ -31,10 +34,11 @@ static void usage(void)
{
fprintf(stderr,
"Usage: bridge [ OPTIONS ] OBJECT { COMMAND | help }\n"
-"where OBJECT := { link | fdb | mdb | vlan | monitor }\n"
-" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
-" -o[neline] | -t[imestamp] | -n[etns] name |\n"
-" -c[ompressvlans] }\n");
+" bridge [ -force ] -batch filename\n"
+"where OBJECT := { link | fdb | mdb | vlan | monitor }\n"
+" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] |\n"
+" -o[neline] | -t[imestamp] | -n[etns] name |\n"
+" -c[ompressvlans] }\n");
exit(-1);
}
@@ -48,9 +52,9 @@ static const struct cmd {
const char *cmd;
int (*func)(int argc, char **argv);
} cmds[] = {
- { "link", do_link },
- { "fdb", do_fdb },
- { "mdb", do_mdb },
+ { "link", do_link },
+ { "fdb", do_fdb },
+ { "mdb", do_mdb },
{ "vlan", do_vlan },
{ "monitor", do_monitor },
{ "help", do_help },
@@ -66,16 +70,62 @@ static int do_cmd(const char *argv0, int argc, char **argv)
return c->func(argc-1, argv+1);
}
- fprintf(stderr, "Object \"%s\" is unknown, try \"bridge help\".\n", argv0);
+ fprintf(stderr,
+ "Object \"%s\" is unknown, try \"bridge help\".\n", argv0);
return -1;
}
+static int batch(const char *name)
+{
+ char *line = NULL;
+ size_t len = 0;
+ int ret = EXIT_SUCCESS;
+
+ if (name && strcmp(name, "-") != 0) {
+ if (freopen(name, "r", stdin) == NULL) {
+ fprintf(stderr,
+ "Cannot open file \"%s\" for reading: %s\n",
+ name, strerror(errno));
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (rtnl_open(&rth, 0) < 0) {
+ fprintf(stderr, "Cannot open rtnetlink\n");
+ return EXIT_FAILURE;
+ }
+
+ cmdlineno = 0;
+ while (getcmdline(&line, &len, stdin) != -1) {
+ char *largv[100];
+ int largc;
+
+ largc = makeargs(line, largv, 100);
+ if (largc == 0)
+ continue; /* blank line */
+
+ if (do_cmd(largv[0], largc, largv)) {
+ fprintf(stderr, "Command failed %s:%d\n",
+ name, cmdlineno);
+ ret = EXIT_FAILURE;
+ if (!force)
+ break;
+ }
+ }
+ if (line)
+ free(line);
+
+ rtnl_close(&rth);
+ return ret;
+}
+
int
main(int argc, char **argv)
{
while (argc > 1) {
- char *opt = argv[1];
- if (strcmp(opt,"--") == 0) {
+ const char *opt = argv[1];
+
+ if (strcmp(opt, "--") == 0) {
argc--; argv++;
break;
}
@@ -98,7 +148,7 @@ main(int argc, char **argv)
++oneline;
} else if (matches(opt, "-timestamp") == 0) {
++timestamp;
- } else if (matches(opt, "-family") == 0) {
+ } else if (matches(opt, "-family") == 0) {
argc--;
argv++;
if (argc <= 1)
@@ -121,14 +171,27 @@ main(int argc, char **argv)
exit(-1);
} else if (matches(opt, "-compressvlans") == 0) {
++compress_vlans;
+ } else if (matches(opt, "-force") == 0) {
+ ++force;
+ } else if (matches(opt, "-batch") == 0) {
+ argc--;
+ argv++;
+ if (argc <= 1)
+ usage();
+ batch_file = argv[1];
} else {
- fprintf(stderr, "Option \"%s\" is unknown, try \"bridge help\".\n", opt);
+ fprintf(stderr,
+ "Option \"%s\" is unknown, try \"bridge help\".\n",
+ opt);
exit(-1);
}
argc--; argv++;
}
- _SL_ = oneline ? "\\" : "\n" ;
+ _SL_ = oneline ? "\\" : "\n";
+
+ if (batch_file)
+ return batch(batch_file);
if (rtnl_open(&rth, 0) < 0)
exit(1);
diff --git a/bridge/fdb.c b/bridge/fdb.c
index 3c33e228..4d109251 100644
--- a/bridge/fdb.c
+++ b/bridge/fdb.c
@@ -31,10 +31,11 @@ static unsigned int filter_index;
static void usage(void)
{
- fprintf(stderr, "Usage: bridge fdb { add | append | del | replace } ADDR dev DEV {self|master} [ temp ]\n"
- " [router] [ dst IPADDR] [ vlan VID ]\n"
- " [ port PORT] [ vni VNI ] [via DEV]\n");
- fprintf(stderr, " bridge fdb {show} [ br BRDEV ] [ brport DEV ]\n");
+ fprintf(stderr, "Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n"
+ " [ self ] [ master ] [ use ] [ router ]\n"
+ " [ local | temp ] [ dst IPADDR ] [ vlan VID ]\n"
+ " [ port PORT] [ vni VNI ] [ via DEV ]\n");
+ fprintf(stderr, " bridge fdb [ show [ br BRDEV ] [ brport DEV ] ]\n");
exit(-1);
}
@@ -159,9 +160,11 @@ int print_fdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (r->ndm_flags & NTF_ROUTER)
fprintf(fp, "router ");
if (r->ndm_flags & NTF_EXT_LEARNED)
- fprintf(fp, "external ");
+ fprintf(fp, "offload ");
fprintf(fp, "%s\n", state_n2a(r->ndm_state));
+ fflush(fp);
+
return 0;
}
@@ -305,6 +308,8 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
duparg2("vlan", *argv);
NEXT_ARG();
vid = atoi(*argv);
+ } else if (matches(*argv, "use") == 0) {
+ req.ndm.ndm_flags |= NTF_USE;
} else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
@@ -362,7 +367,7 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv)
return -1;
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return -1;
return 0;
diff --git a/bridge/link.c b/bridge/link.c
index 1af1cf33..a9b1262d 100644
--- a/bridge/link.c
+++ b/bridge/link.c
@@ -415,7 +415,7 @@ static int brlink_modify(int argc, char **argv)
addattr_nest_end(&req.n, nest);
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return -1;
return 0;
diff --git a/bridge/mdb.c b/bridge/mdb.c
index a6b28827..24c49035 100644
--- a/bridge/mdb.c
+++ b/bridge/mdb.c
@@ -28,7 +28,7 @@ static unsigned int filter_index;
static void usage(void)
{
- fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp]\n");
+ fprintf(stderr, "Usage: bridge mdb { add | del } dev DEV port PORT grp GROUP [permanent | temp] [vid VID]\n");
fprintf(stderr, " bridge mdb {show} [ dev DEV ]\n");
exit(-1);
}
@@ -48,23 +48,29 @@ static void br_print_router_ports(FILE *f, struct rtattr *attr)
fprintf(f, "\n");
}
-static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e)
+static void print_mdb_entry(FILE *f, int ifindex, struct br_mdb_entry *e,
+ struct nlmsghdr *n)
{
SPRINT_BUF(abuf);
-
- if (e->addr.proto == htons(ETH_P_IP))
- fprintf(f, "dev %s port %s grp %s %s\n", ll_index_to_name(ifindex),
- ll_index_to_name(e->ifindex),
- inet_ntop(AF_INET, &e->addr.u.ip4, abuf, sizeof(abuf)),
- (e->state & MDB_PERMANENT) ? "permanent" : "temp");
- else
- fprintf(f, "dev %s port %s grp %s %s\n", ll_index_to_name(ifindex),
- ll_index_to_name(e->ifindex),
- inet_ntop(AF_INET6, &e->addr.u.ip6, abuf, sizeof(abuf)),
- (e->state & MDB_PERMANENT) ? "permanent" : "temp");
+ const void *src;
+ int af;
+
+ af = e->addr.proto == htons(ETH_P_IP) ? AF_INET : AF_INET6;
+ src = af == AF_INET ? (const void *)&e->addr.u.ip4 :
+ (const void *)&e->addr.u.ip6;
+ if (n->nlmsg_type == RTM_DELMDB)
+ fprintf(f, "Deleted ");
+ fprintf(f, "dev %s port %s grp %s %s", ll_index_to_name(ifindex),
+ ll_index_to_name(e->ifindex),
+ inet_ntop(af, src, abuf, sizeof(abuf)),
+ (e->state & MDB_PERMANENT) ? "permanent" : "temp");
+ if (e->vid)
+ fprintf(f, " vid %hu", e->vid);
+ fprintf(f, "\n");
}
-static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr)
+static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr,
+ struct nlmsghdr *n)
{
struct rtattr *i;
int rem;
@@ -73,7 +79,7 @@ static void br_print_mdb_entry(FILE *f, int ifindex, struct rtattr *attr)
rem = RTA_PAYLOAD(attr);
for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
e = RTA_DATA(i);
- print_mdb_entry(f, ifindex, e);
+ print_mdb_entry(f, ifindex, e, n);
}
}
@@ -82,7 +88,7 @@ int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
FILE *fp = arg;
struct br_port_msg *r = NLMSG_DATA(n);
int len = n->nlmsg_len;
- struct rtattr * tb[MDBA_MAX+1];
+ struct rtattr *tb[MDBA_MAX+1], *i;
if (n->nlmsg_type != RTM_GETMDB && n->nlmsg_type != RTM_NEWMDB && n->nlmsg_type != RTM_DELMDB) {
fprintf(stderr, "Not RTM_GETMDB, RTM_NEWMDB or RTM_DELMDB: %08x %08x %08x\n",
@@ -103,20 +109,34 @@ int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
if (tb[MDBA_MDB]) {
- struct rtattr *i;
int rem = RTA_PAYLOAD(tb[MDBA_MDB]);
for (i = RTA_DATA(tb[MDBA_MDB]); RTA_OK(i, rem); i = RTA_NEXT(i, rem))
- br_print_mdb_entry(fp, r->ifindex, i);
+ br_print_mdb_entry(fp, r->ifindex, i, n);
}
if (tb[MDBA_ROUTER]) {
- if (show_details) {
- fprintf(fp, "router ports on %s: ", ll_index_to_name(r->ifindex));
- br_print_router_ports(fp, tb[MDBA_ROUTER]);
+ if (n->nlmsg_type == RTM_GETMDB) {
+ if (show_details) {
+ fprintf(fp, "router ports on %s: ",
+ ll_index_to_name(r->ifindex));
+ br_print_router_ports(fp, tb[MDBA_ROUTER]);
+ }
+ } else {
+ uint32_t *port_ifindex;
+
+ i = RTA_DATA(tb[MDBA_ROUTER]);
+ port_ifindex = RTA_DATA(i);
+ if (n->nlmsg_type == RTM_DELMDB)
+ fprintf(fp, "Deleted ");
+ fprintf(fp, "router port dev %s master %s\n",
+ ll_index_to_name(*port_ifindex),
+ ll_index_to_name(r->ifindex));
}
}
+ fflush(fp);
+
return 0;
}
@@ -165,6 +185,7 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
} req;
struct br_mdb_entry entry;
char *d = NULL, *p = NULL, *grp = NULL;
+ short vid = 0;
memset(&req, 0, sizeof(req));
memset(&entry, 0, sizeof(entry));
@@ -189,6 +210,9 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
entry.state |= MDB_PERMANENT;
} else if (strcmp(*argv, "temp") == 0) {
;/* nothing */
+ } else if (strcmp(*argv, "vid") == 0) {
+ NEXT_ARG();
+ vid = atoi(*argv);
} else {
if (matches(*argv, "help") == 0)
usage();
@@ -222,9 +246,10 @@ static int mdb_modify(int cmd, int flags, int argc, char **argv)
} else
entry.addr.proto = htons(ETH_P_IP);
+ entry.vid = vid;
addattr_l(&req.n, sizeof(req), MDBA_SET_ENTRY, &entry, sizeof(entry));
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return -1;
return 0;
diff --git a/bridge/monitor.c b/bridge/monitor.c
index 9e1ed48c..d8341ec5 100644
--- a/bridge/monitor.c
+++ b/bridge/monitor.c
@@ -36,6 +36,7 @@ static void usage(void)
}
static int accept_msg(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
struct nlmsghdr *n, void *arg)
{
FILE *fp = arg;
diff --git a/bridge/vlan.c b/bridge/vlan.c
index 2ae739cf..ac2f5231 100644
--- a/bridge/vlan.c
+++ b/bridge/vlan.c
@@ -131,7 +131,7 @@ static int vlan_modify(int cmd, int argc, char **argv)
addattr_nest_end(&req.n, afspec);
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return -1;
return 0;
diff --git a/configure b/configure
index c3dacdba..d2540b0d 100755
--- a/configure
+++ b/configure
@@ -201,7 +201,7 @@ check_setns()
{
cat >$TMPDIR/setnstest.c <<EOF
#include <sched.h>
-int main(int argc, char **argv)
+int main(int argc, char **argv)
{
(void)setns(0,0);
return 0;
@@ -249,6 +249,29 @@ EOF
rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest
}
+check_elf()
+{
+ cat >$TMPDIR/elftest.c <<EOF
+#include <libelf.h>
+#include <gelf.h>
+int main(void)
+{
+ Elf_Scn *scn;
+ GElf_Shdr shdr;
+ return elf_version(EV_CURRENT);
+}
+EOF
+
+ if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
+ then
+ echo "TC_CONFIG_ELF:=y" >>Config
+ echo "yes"
+ else
+ echo "no"
+ fi
+ rm -f $TMPDIR/elftest.c $TMPDIR/elftest
+}
+
check_selinux()
# SELinux is a compile time option in the ss utility
{
@@ -261,6 +284,39 @@ check_selinux()
fi
}
+check_mnl()
+{
+ if ${PKG_CONFIG} libmnl --exists
+ then
+ echo "HAVE_MNL:=y" >>Config
+ echo "yes"
+ else
+ echo "no"
+ fi
+}
+
+check_berkeley_db()
+{
+ cat >$TMPDIR/dbtest.c <<EOF
+#include <fcntl.h>
+#include <stdlib.h>
+#include <db_185.h>
+int main(int argc, char **argv) {
+ dbopen("/tmp/xxx_test_db.db", O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
+ return 0;
+}
+EOF
+ $CC -I$INCLUDE -o $TMPDIR/dbtest $TMPDIR/dbtest.c -ldb >/dev/null 2>&1
+ if [ $? -eq 0 ]
+ then
+ echo "HAVE_BERKELEY_DB:=y" >>Config
+ echo "yes"
+ else
+ echo "no"
+ fi
+ rm -f $TMPDIR/dbtest.c $TMPDIR/dbtest
+}
+
echo "# Generated config based on" $INCLUDE >Config
check_toolchain
@@ -278,7 +334,8 @@ check_ipt
echo -n " IPSET "
check_ipset
-echo -n -e "\niptables modules directory: "
+echo
+echo -n "iptables modules directory: "
check_ipt_lib_dir
echo -n "libc has setns: "
@@ -287,5 +344,16 @@ check_setns
echo -n "SELinux support: "
check_selinux
-echo -e "\nDocs"
+echo -n "ELF support: "
+check_elf
+
+echo -n "libmnl support: "
+check_mnl
+
+echo -n "Berkeley DB: "
+check_berkeley_db
+
+echo
+echo -n "docs:"
check_docs
+echo
diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex
index e7a79a5d..67094c95 100644
--- a/doc/ip-cref.tex
+++ b/doc/ip-cref.tex
@@ -1432,6 +1432,17 @@ database.
even if it does not match any interface prefix. One application of this
option may be found in~\cite{IP-TUNNELS}.
+\item \verb|pref PREF|
+
+--- the IPv6 route preference.
+\verb|PREF| PREF is a string specifying the route preference as defined in
+RFC4191 for Router Discovery messages. Namely:
+\begin{itemize}
+\item \verb|low| --- the route has a lowest priority.
+\item \verb|medium| --- the route has a default priority.
+\item \verb|high| --- the route has a highest priority.
+\end{itemize}
+
\end{itemize}
@@ -2235,6 +2246,42 @@ Besides that, the host 193.233.7.83 is translated into
another prefix to look like 192.203.80.144 when talking
to the outer world.
+\subsection{{\tt ip rule save} -- save rules tables}
+\label{IP-RULE-SAVE}
+
+\paragraph{Description:} this command saves the contents of the rules
+tables or the rule(s) selected by some criteria to standard output.
+
+\paragraph{Arguments:} \verb|ip rule save| has the same arguments as
+\verb|ip rule show|.
+
+\paragraph{Example:} This saves all the rules to the {\tt saved\_rules}
+file:
+\begin{verbatim}
+dan@caffeine:~ # ip rule save > saved_rules
+\end{verbatim}
+
+\paragraph{Output format:} The format of the data stream provided by
+\verb|ip rule save| is that of \verb|rtnetlink|. See
+\verb|rtnetlink(7)| for more information.
+
+\subsection{{\tt ip rule restore} -- restore rules tables}
+\label{IP-RULE-RESTORE}
+
+\paragraph{Description:} this command restores the contents of the rules
+tables according to a data stream as provided by \verb|ip rule save| via
+standard input. Note that any rules already in the table are left unchanged,
+and duplicates are not ignored.
+
+\paragraph{Arguments:} This command takes no arguments.
+
+\paragraph{Example:} This restores all rules that were saved to the
+{\tt saved\_rules} file:
+
+\begin{verbatim}
+dan@caffeine:~ # ip rule restore < saved_rules
+\end{verbatim}
+
\section{{\tt ip maddress} --- multicast addresses management}
diff --git a/etc/iproute2/bpf_pinning b/etc/iproute2/bpf_pinning
new file mode 100644
index 00000000..2b39c709
--- /dev/null
+++ b/etc/iproute2/bpf_pinning
@@ -0,0 +1,6 @@
+#
+# subpath mappings from mount point for pinning
+#
+#3 tracing
+#4 foo/bar
+#5 tc/cls1
diff --git a/etc/iproute2/rt_tables.d/README b/etc/iproute2/rt_tables.d/README
new file mode 100644
index 00000000..79386f89
--- /dev/null
+++ b/etc/iproute2/rt_tables.d/README
@@ -0,0 +1,3 @@
+Each file in this directory is an rt_tables configuration file. iproute2
+commands scan this directory processing all files that end in '.conf'.
+
diff --git a/examples/bpf/README b/examples/bpf/README
new file mode 100644
index 00000000..42472578
--- /dev/null
+++ b/examples/bpf/README
@@ -0,0 +1,13 @@
+eBPF toy code examples (running in kernel) to familiarize yourself
+with syntax and features:
+
+ - bpf_prog.c -> Classifier examples with using maps
+ - bpf_shared.c -> Ingress/egress map sharing example
+ - bpf_tailcall.c -> Using tail call chains
+ - bpf_cyclic.c -> Simple cycle as tail calls
+ - bpf_graft.c -> Demo on altering runtime behaviour
+
+User space code example:
+
+ - bpf_agent.c -> Counterpart to bpf_prog.c for user
+ space to transfer/read out map data
diff --git a/examples/bpf/bpf_agent.c b/examples/bpf/bpf_agent.c
new file mode 100644
index 00000000..f9b9ce3c
--- /dev/null
+++ b/examples/bpf/bpf_agent.c
@@ -0,0 +1,258 @@
+/*
+ * eBPF user space agent part
+ *
+ * Simple, _self-contained_ user space agent for the eBPF kernel
+ * ebpf_prog.c program, which gets all map fds passed from tc via unix
+ * domain socket in one transaction and can thus keep referencing
+ * them from user space in order to read out (or possibly modify)
+ * map data. Here, just as a minimal example to display counters.
+ *
+ * The agent only uses the bpf(2) syscall API to read or possibly
+ * write to eBPF maps, it doesn't need to be aware of the low-level
+ * bytecode parts and/or ELF parsing bits.
+ *
+ * ! For more details, see header comment in bpf_prog.c !
+ *
+ * gcc bpf_agent.c -o bpf_agent -Wall -O2
+ *
+ * For example, a more complex user space agent could run on each
+ * host, reading and writing into eBPF maps used by tc classifier
+ * and actions. It would thus allow for implementing a distributed
+ * tc architecture, for example, which would push down central
+ * policies into eBPF maps, and thus altering run-time behaviour.
+ *
+ * -- Happy eBPF hacking! ;)
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+
+/* Just some misc macros as min(), offsetof(), etc. */
+#include "../../include/utils.h"
+/* Common code from fd passing. */
+#include "../../include/bpf_scm.h"
+/* Common, shared definitions with ebpf_prog.c */
+#include "bpf_shared.h"
+/* Mini syscall wrapper */
+#include "bpf_sys.h"
+
+static void bpf_dump_drops(int fd)
+{
+ int cpu, max;
+
+ max = sysconf(_SC_NPROCESSORS_ONLN);
+
+ printf(" `- number of drops:");
+ for (cpu = 0; cpu < max; cpu++) {
+ long drops;
+
+ assert(bpf_lookup_elem(fd, &cpu, &drops) == 0);
+ printf("\tcpu%d: %5ld", cpu, drops);
+ }
+ printf("\n");
+}
+
+static void bpf_dump_queue(int fd)
+{
+ /* Just for the same of the example. */
+ int max_queue = 4, i;
+
+ printf(" | nic queues:");
+ for (i = 0; i < max_queue; i++) {
+ struct count_queue cq;
+ int ret;
+
+ memset(&cq, 0, sizeof(cq));
+ ret = bpf_lookup_elem(fd, &i, &cq);
+ assert(ret == 0 || (ret < 0 && errno == ENOENT));
+
+ printf("\tq%d:[pkts: %ld, mis: %ld]",
+ i, cq.total, cq.mismatch);
+ }
+ printf("\n");
+}
+
+static void bpf_dump_proto(int fd)
+{
+ uint8_t protos[] = { IPPROTO_TCP, IPPROTO_UDP, IPPROTO_ICMP };
+ char *names[] = { "tcp", "udp", "icmp" };
+ int i;
+
+ printf(" ` protos:");
+ for (i = 0; i < ARRAY_SIZE(protos); i++) {
+ struct count_tuple ct;
+ int ret;
+
+ memset(&ct, 0, sizeof(ct));
+ ret = bpf_lookup_elem(fd, &protos[i], &ct);
+ assert(ret == 0 || (ret < 0 && errno == ENOENT));
+
+ printf("\t%s:[pkts: %ld, bytes: %ld]",
+ names[i], ct.packets, ct.bytes);
+ }
+ printf("\n");
+}
+
+static void bpf_dump_map_data(int *tfd)
+{
+ int i;
+
+ for (i = 0; i < 30; i++) {
+ const int period = 5;
+
+ printf("data, period: %dsec\n", period);
+
+ bpf_dump_drops(tfd[BPF_MAP_ID_DROPS]);
+ bpf_dump_queue(tfd[BPF_MAP_ID_QUEUE]);
+ bpf_dump_proto(tfd[BPF_MAP_ID_PROTO]);
+
+ sleep(period);
+ }
+}
+
+static void bpf_info_loop(int *fds, struct bpf_map_aux *aux)
+{
+ int i, tfd[BPF_MAP_ID_MAX];
+
+ printf("ver: %d\nobj: %s\ndev: %lu\nino: %lu\nmaps: %u\n",
+ aux->uds_ver, aux->obj_name, aux->obj_st.st_dev,
+ aux->obj_st.st_ino, aux->num_ent);
+
+ for (i = 0; i < aux->num_ent; i++) {
+ printf("map%d:\n", i);
+ printf(" `- fd: %u\n", fds[i]);
+ printf(" | serial: %u\n", aux->ent[i].id);
+ printf(" | type: %u\n", aux->ent[i].type);
+ printf(" | max elem: %u\n", aux->ent[i].max_elem);
+ printf(" | size key: %u\n", aux->ent[i].size_key);
+ printf(" ` size val: %u\n", aux->ent[i].size_value);
+
+ tfd[aux->ent[i].id] = fds[i];
+ }
+
+ bpf_dump_map_data(tfd);
+}
+
+static void bpf_map_get_from_env(int *tfd)
+{
+ char key[64], *val;
+ int i;
+
+ for (i = 0; i < BPF_MAP_ID_MAX; i++) {
+ memset(key, 0, sizeof(key));
+ snprintf(key, sizeof(key), "BPF_MAP%d", i);
+
+ val = getenv(key);
+ assert(val != NULL);
+
+ tfd[i] = atoi(val);
+ }
+}
+
+static int bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ struct bpf_map_set_msg msg;
+ int *cmsg_buf, min_fd, i;
+ char *amsg_buf, *mmsg_buf;
+
+ cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
+ amsg_buf = (char *)msg.aux.ent;
+ mmsg_buf = (char *)&msg.aux;
+
+ for (i = 0; i < entries; i += min_fd) {
+ struct cmsghdr *cmsg;
+ int ret;
+
+ min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
+
+ bpf_map_set_init_single(&msg, min_fd);
+
+ ret = recvmsg(fd, &msg.hdr, 0);
+ if (ret <= 0)
+ return ret ? : -1;
+
+ cmsg = CMSG_FIRSTHDR(&msg.hdr);
+ if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
+ return -EINVAL;
+ if (msg.hdr.msg_flags & MSG_CTRUNC)
+ return -EIO;
+
+ min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
+ if (min_fd > entries || min_fd <= 0)
+ return -1;
+
+ memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
+ memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
+ memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
+
+ if (i + min_fd == aux->num_ent)
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int fds[BPF_SCM_MAX_FDS];
+ struct bpf_map_aux aux;
+ struct sockaddr_un addr;
+ int fd, ret, i;
+
+ /* When arguments are being passed, we take it as a path
+ * to a Unix domain socket, otherwise we grab the fds
+ * from the environment to demonstrate both possibilities.
+ */
+ if (argc == 1) {
+ int tfd[BPF_MAP_ID_MAX];
+
+ bpf_map_get_from_env(tfd);
+ bpf_dump_map_data(tfd);
+
+ return 0;
+ }
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open socket: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, argv[argc - 1], sizeof(addr.sun_path));
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot bind to socket: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ memset(fds, 0, sizeof(fds));
+ memset(&aux, 0, sizeof(aux));
+
+ ret = bpf_map_set_recv(fd, fds, &aux, BPF_SCM_MAX_FDS);
+ if (ret >= 0)
+ bpf_info_loop(fds, &aux);
+
+ for (i = 0; i < aux.num_ent; i++)
+ close(fds[i]);
+
+ close(fd);
+ return 0;
+}
diff --git a/examples/bpf/bpf_cyclic.c b/examples/bpf/bpf_cyclic.c
new file mode 100644
index 00000000..c66cbecc
--- /dev/null
+++ b/examples/bpf/bpf_cyclic.c
@@ -0,0 +1,30 @@
+#include "../../include/bpf_api.h"
+
+/* Cyclic dependency example to test the kernel's runtime upper
+ * bound on loops. Also demonstrates on how to use direct-actions,
+ * loaded as: tc filter add [...] bpf da obj [...]
+ */
+#define JMP_MAP_ID 0xabccba
+
+BPF_PROG_ARRAY(jmp_tc, JMP_MAP_ID, PIN_OBJECT_NS, 1);
+
+__section_tail(JMP_MAP_ID, 0)
+int cls_loop(struct __sk_buff *skb)
+{
+ char fmt[] = "cb: %u\n";
+
+ trace_printk(fmt, sizeof(fmt), skb->cb[0]++);
+ tail_call(skb, &jmp_tc, 0);
+
+ skb->tc_classid = TC_H_MAKE(1, 42);
+ return TC_ACT_OK;
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+ tail_call(skb, &jmp_tc, 0);
+ return TC_ACT_SHOT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_graft.c b/examples/bpf/bpf_graft.c
new file mode 100644
index 00000000..f48fd028
--- /dev/null
+++ b/examples/bpf/bpf_graft.c
@@ -0,0 +1,67 @@
+#include "../../include/bpf_api.h"
+
+/* This example demonstrates how classifier run-time behaviour
+ * can be altered with tail calls. We start out with an empty
+ * jmp_tc array, then add section aaa to the array slot 0, and
+ * later on atomically replace it with section bbb. Note that
+ * as shown in other examples, the tc loader can prepopulate
+ * tail called sections, here we start out with an empty one
+ * on purpose to show it can also be done this way.
+ *
+ * tc filter add dev foo parent ffff: bpf obj graft.o
+ * tc exec bpf dbg
+ * [...]
+ * Socket Thread-20229 [001] ..s. 138993.003923: : fallthrough
+ * <idle>-0 [001] ..s. 138993.202265: : fallthrough
+ * Socket Thread-20229 [001] ..s. 138994.004149: : fallthrough
+ * [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec aaa
+ * tc exec bpf dbg
+ * [...]
+ * Socket Thread-19818 [002] ..s. 139012.053587: : aaa
+ * <idle>-0 [002] ..s. 139012.172359: : aaa
+ * Socket Thread-19818 [001] ..s. 139012.173556: : aaa
+ * [...]
+ *
+ * tc exec bpf graft m:globals/jmp_tc key 0 obj graft.o sec bbb
+ * tc exec bpf dbg
+ * [...]
+ * Socket Thread-19818 [002] ..s. 139022.102967: : bbb
+ * <idle>-0 [002] ..s. 139022.155640: : bbb
+ * Socket Thread-19818 [001] ..s. 139022.156730: : bbb
+ * [...]
+ */
+
+BPF_PROG_ARRAY(jmp_tc, 0, PIN_GLOBAL_NS, 1);
+
+__section("aaa")
+int cls_aaa(struct __sk_buff *skb)
+{
+ char fmt[] = "aaa\n";
+
+ trace_printk(fmt, sizeof(fmt));
+ return TC_H_MAKE(1, 42);
+}
+
+__section("bbb")
+int cls_bbb(struct __sk_buff *skb)
+{
+ char fmt[] = "bbb\n";
+
+ trace_printk(fmt, sizeof(fmt));
+ return TC_H_MAKE(1, 43);
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+ char fmt[] = "fallthrough\n";
+
+ tail_call(skb, &jmp_tc, 0);
+ trace_printk(fmt, sizeof(fmt));
+
+ return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_prog.c b/examples/bpf/bpf_prog.c
new file mode 100644
index 00000000..47280492
--- /dev/null
+++ b/examples/bpf/bpf_prog.c
@@ -0,0 +1,499 @@
+/*
+ * eBPF kernel space program part
+ *
+ * Toy eBPF program for demonstration purposes, some parts derived from
+ * kernel tree's samples/bpf/sockex2_kern.c example.
+ *
+ * More background on eBPF, kernel tree: Documentation/networking/filter.txt
+ *
+ * Note, this file is rather large, and most classifier and actions are
+ * likely smaller to accomplish one specific use-case and are tailored
+ * for high performance. For performance reasons, you might also have the
+ * classifier and action already merged inside the classifier.
+ *
+ * In order to show various features it serves as a bigger programming
+ * example, which you should feel free to rip apart and experiment with.
+ *
+ * Compilation, configuration example:
+ *
+ * Note: as long as the BPF backend in LLVM is still experimental,
+ * you need to build LLVM with LLVM with --enable-experimental-targets=BPF
+ * Also, make sure your 4.1+ kernel is compiled with CONFIG_BPF_SYSCALL=y,
+ * and you have libelf.h and gelf.h headers and can link tc against -lelf.
+ *
+ * In case you need to sync kernel headers, go to your kernel source tree:
+ * # make headers_install INSTALL_HDR_PATH=/usr/
+ *
+ * $ export PATH=/home/<...>/llvm/Debug+Asserts/bin/:$PATH
+ * $ clang -O2 -emit-llvm -c bpf_prog.c -o - | llc -march=bpf -filetype=obj -o bpf.o
+ * $ objdump -h bpf.o
+ * [...]
+ * 3 classifier 000007f8 0000000000000000 0000000000000000 00000040 2**3
+ * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
+ * 4 action-mark 00000088 0000000000000000 0000000000000000 00000838 2**3
+ * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
+ * 5 action-rand 00000098 0000000000000000 0000000000000000 000008c0 2**3
+ * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
+ * 6 maps 00000030 0000000000000000 0000000000000000 00000958 2**2
+ * CONTENTS, ALLOC, LOAD, DATA
+ * 7 license 00000004 0000000000000000 0000000000000000 00000988 2**0
+ * CONTENTS, ALLOC, LOAD, DATA
+ * [...]
+ * # echo 1 > /proc/sys/net/core/bpf_jit_enable
+ * $ gcc bpf_agent.c -o bpf_agent -Wall -O2
+ * # ./bpf_agent /tmp/bpf-uds (e.g. on a different terminal)
+ * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
+ * action bpf obj bpf.o sec action-mark \
+ * action bpf obj bpf.o sec action-rand ok
+ * # tc filter show dev em1
+ * filter parent 1: protocol all pref 49152 bpf
+ * filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[classifier]
+ * action order 1: bpf bpf.o:[action-mark] default-action pipe
+ * index 52 ref 1 bind 1
+ *
+ * action order 2: bpf bpf.o:[action-rand] default-action pipe
+ * index 53 ref 1 bind 1
+ *
+ * action order 3: gact action pass
+ * random type none pass val 0
+ * index 38 ref 1 bind 1
+ *
+ * The same program can also be installed on ingress side (as opposed to above
+ * egress configuration), e.g.:
+ *
+ * # tc qdisc add dev em1 handle ffff: ingress
+ * # tc filter add dev em1 parent ffff: bpf obj ...
+ *
+ * Notes on BPF agent:
+ *
+ * In the above example, the bpf_agent creates the unix domain socket
+ * natively. "tc exec" can also spawn a shell and hold the socktes there:
+ *
+ * # tc exec bpf imp /tmp/bpf-uds
+ * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
+ * action bpf obj bpf.o sec action-mark \
+ * action bpf obj bpf.o sec action-rand ok
+ * sh-4.2# (shell spawned from tc exec)
+ * sh-4.2# bpf_agent
+ * [...]
+ *
+ * This will read out fds over environment and produce the same data dump
+ * as below. This has the advantage that the spawned shell owns the fds
+ * and thus if the agent is restarted, it can reattach to the same fds, also
+ * various programs can easily read/modify the data simultaneously from user
+ * space side.
+ *
+ * If the shell is unnecessary, the agent can also just be spawned directly
+ * via tc exec:
+ *
+ * # tc exec bpf imp /tmp/bpf-uds run bpf_agent
+ * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \
+ * action bpf obj bpf.o sec action-mark \
+ * action bpf obj bpf.o sec action-rand ok
+ *
+ * BPF agent example output:
+ *
+ * ver: 1
+ * obj: bpf.o
+ * dev: 64770
+ * ino: 6045133
+ * maps: 3
+ * map0:
+ * `- fd: 4
+ * | serial: 1
+ * | type: 1
+ * | max elem: 256
+ * | size key: 1
+ * ` size val: 16
+ * map1:
+ * `- fd: 5
+ * | serial: 2
+ * | type: 1
+ * | max elem: 1024
+ * | size key: 4
+ * ` size val: 16
+ * map2:
+ * `- fd: 6
+ * | serial: 3
+ * | type: 2
+ * | max elem: 64
+ * | size key: 4
+ * ` size val: 8
+ * data, period: 5sec
+ * `- number of drops: cpu0: 0 cpu1: 0 cpu2: 0 cpu3: 0
+ * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 0, mis: 0] q3:[pkts: 0, mis: 0]
+ * ` protos: tcp:[pkts: 0, bytes: 0] udp:[pkts: 0, bytes: 0] icmp:[pkts: 0, bytes: 0]
+ * data, period: 5sec
+ * `- number of drops: cpu0: 5 cpu1: 0 cpu2: 0 cpu3: 1
+ * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 24, mis: 14] q3:[pkts: 0, mis: 0]
+ * ` protos: tcp:[pkts: 13, bytes: 1989] udp:[pkts: 10, bytes: 710] icmp:[pkts: 0, bytes: 0]
+ * data, period: 5sec
+ * `- number of drops: cpu0: 5 cpu1: 0 cpu2: 3 cpu3: 3
+ * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 39, mis: 21] q3:[pkts: 0, mis: 0]
+ * ` protos: tcp:[pkts: 20, bytes: 3549] udp:[pkts: 18, bytes: 1278] icmp:[pkts: 0, bytes: 0]
+ * [...]
+ *
+ * This now means, the below classifier and action pipeline has been loaded
+ * as eBPF bytecode into the kernel, the kernel has verified that the
+ * execution of the bytecode is "safe", and it has JITed the programs
+ * afterwards, so that upon invocation they're running on native speed. tc
+ * has transferred all map file descriptors to the bpf_agent via IPC and
+ * even after tc exits, the agent can read out or modify all map data.
+ *
+ * Note that the export to the uds is done only once in the classifier and
+ * not in the action. It's enough to export the (here) shared descriptors
+ * once.
+ *
+ * If you need to disassemble the generated JIT image (echo with 2), the
+ * kernel tree has under tools/net/ a small helper, you can invoke e.g.
+ * `bpf_jit_disasm -o`.
+ *
+ * Please find in the code below further comments.
+ *
+ * -- Happy eBPF hacking! ;)
+ */
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <asm/types.h>
+#include <linux/in.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_tunnel.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* Common, shared definitions with ebpf_agent.c. */
+#include "bpf_shared.h"
+/* BPF helper functions for our example. */
+#include "../../include/bpf_api.h"
+
+/* Could be defined here as well, or included from the header. */
+#define TC_ACT_UNSPEC (-1)
+#define TC_ACT_OK 0
+#define TC_ACT_RECLASSIFY 1
+#define TC_ACT_SHOT 2
+#define TC_ACT_PIPE 3
+#define TC_ACT_STOLEN 4
+#define TC_ACT_QUEUED 5
+#define TC_ACT_REPEAT 6
+
+/* Other, misc stuff. */
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+/* eBPF map definitions, all placed in section "maps". */
+struct bpf_elf_map __section("maps") map_proto = {
+ .type = BPF_MAP_TYPE_HASH,
+ .id = BPF_MAP_ID_PROTO,
+ .size_key = sizeof(uint8_t),
+ .size_value = sizeof(struct count_tuple),
+ .max_elem = 256,
+};
+
+struct bpf_elf_map __section("maps") map_queue = {
+ .type = BPF_MAP_TYPE_HASH,
+ .id = BPF_MAP_ID_QUEUE,
+ .size_key = sizeof(uint32_t),
+ .size_value = sizeof(struct count_queue),
+ .max_elem = 1024,
+};
+
+struct bpf_elf_map __section("maps") map_drops = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .id = BPF_MAP_ID_DROPS,
+ .size_key = sizeof(uint32_t),
+ .size_value = sizeof(long),
+ .max_elem = 64,
+};
+
+/* Helper functions and definitions for the flow dissector used by the
+ * example classifier. This resembles the kernel's flow dissector to
+ * some extend and is just used as an example to show what's possible
+ * with eBPF.
+ */
+struct sockaddr;
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __u32 src;
+ __u32 dst;
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __s32 th_off;
+ __u8 ip_proto;
+};
+
+static inline int flow_ports_offset(__u8 ip_proto)
+{
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ default:
+ return 0;
+ case IPPROTO_AH:
+ return 4;
+ }
+}
+
+static inline bool flow_is_frag(struct __sk_buff *skb, int nh_off)
+{
+ return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) &
+ (IP_MF | IP_OFFSET));
+}
+
+static inline int flow_parse_ipv4(struct __sk_buff *skb, int nh_off,
+ __u8 *ip_proto, struct flow_keys *flow)
+{
+ __u8 ip_ver_len;
+
+ if (unlikely(flow_is_frag(skb, nh_off)))
+ *ip_proto = 0;
+ else
+ *ip_proto = load_byte(skb, nh_off + offsetof(struct iphdr,
+ protocol));
+ if (*ip_proto != IPPROTO_GRE) {
+ flow->src = load_word(skb, nh_off + offsetof(struct iphdr, saddr));
+ flow->dst = load_word(skb, nh_off + offsetof(struct iphdr, daddr));
+ }
+
+ ip_ver_len = load_byte(skb, nh_off + 0 /* offsetof(struct iphdr, ihl) */);
+ if (likely(ip_ver_len == 0x45))
+ nh_off += 20;
+ else
+ nh_off += (ip_ver_len & 0xF) << 2;
+
+ return nh_off;
+}
+
+static inline __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off)
+{
+ __u32 w0 = load_word(skb, off);
+ __u32 w1 = load_word(skb, off + sizeof(w0));
+ __u32 w2 = load_word(skb, off + sizeof(w0) * 2);
+ __u32 w3 = load_word(skb, off + sizeof(w0) * 3);
+
+ return w0 ^ w1 ^ w2 ^ w3;
+}
+
+static inline int flow_parse_ipv6(struct __sk_buff *skb, int nh_off,
+ __u8 *ip_proto, struct flow_keys *flow)
+{
+ *ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr));
+
+ flow->src = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, saddr));
+ flow->dst = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, daddr));
+
+ return nh_off + sizeof(struct ipv6hdr);
+}
+
+static inline bool flow_dissector(struct __sk_buff *skb,
+ struct flow_keys *flow)
+{
+ int poff, nh_off = BPF_LL_OFF + ETH_HLEN;
+ __be16 proto = skb->protocol;
+ __u8 ip_proto;
+
+ /* TODO: check for skb->vlan_tci, skb->vlan_proto first */
+ if (proto == htons(ETH_P_8021AD)) {
+ proto = load_half(skb, nh_off +
+ offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
+ nh_off += sizeof(struct vlan_hdr);
+ }
+ if (proto == htons(ETH_P_8021Q)) {
+ proto = load_half(skb, nh_off +
+ offsetof(struct vlan_hdr, h_vlan_encapsulated_proto));
+ nh_off += sizeof(struct vlan_hdr);
+ }
+
+ if (likely(proto == htons(ETH_P_IP)))
+ nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
+ else if (proto == htons(ETH_P_IPV6))
+ nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
+ else
+ return false;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u16 gre_flags = load_half(skb, nh_off +
+ offsetof(struct gre_hdr, flags));
+ __u16 gre_proto = load_half(skb, nh_off +
+ offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION | GRE_ROUTING))
+ break;
+
+ nh_off += 4;
+ if (gre_flags & GRE_CSUM)
+ nh_off += 4;
+ if (gre_flags & GRE_KEY)
+ nh_off += 4;
+ if (gre_flags & GRE_SEQ)
+ nh_off += 4;
+
+ if (gre_proto == ETH_P_8021Q) {
+ gre_proto = load_half(skb, nh_off +
+ offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nh_off += sizeof(struct vlan_hdr);
+ }
+ if (gre_proto == ETH_P_IP)
+ nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
+ else if (gre_proto == ETH_P_IPV6)
+ nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
+ else
+ return false;
+ break;
+ }
+ case IPPROTO_IPIP:
+ nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow);
+ break;
+ case IPPROTO_IPV6:
+ nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow);
+ default:
+ break;
+ }
+
+ nh_off += flow_ports_offset(ip_proto);
+
+ flow->ports = load_word(skb, nh_off);
+ flow->th_off = nh_off;
+ flow->ip_proto = ip_proto;
+
+ return true;
+}
+
+static inline void cls_update_proto_map(const struct __sk_buff *skb,
+ const struct flow_keys *flow)
+{
+ uint8_t proto = flow->ip_proto;
+ struct count_tuple *ct, _ct;
+
+ ct = map_lookup_elem(&map_proto, &proto);
+ if (likely(ct)) {
+ lock_xadd(&ct->packets, 1);
+ lock_xadd(&ct->bytes, skb->len);
+ return;
+ }
+
+ /* No hit yet, we need to create a new entry. */
+ _ct.packets = 1;
+ _ct.bytes = skb->len;
+
+ map_update_elem(&map_proto, &proto, &_ct, BPF_ANY);
+}
+
+static inline void cls_update_queue_map(const struct __sk_buff *skb)
+{
+ uint32_t queue = skb->queue_mapping;
+ struct count_queue *cq, _cq;
+ bool mismatch;
+
+ mismatch = skb->queue_mapping != get_smp_processor_id();
+
+ cq = map_lookup_elem(&map_queue, &queue);
+ if (likely(cq)) {
+ lock_xadd(&cq->total, 1);
+ if (mismatch)
+ lock_xadd(&cq->mismatch, 1);
+ return;
+ }
+
+ /* No hit yet, we need to create a new entry. */
+ _cq.total = 1;
+ _cq.mismatch = mismatch ? 1 : 0;
+
+ map_update_elem(&map_queue, &queue, &_cq, BPF_ANY);
+}
+
+/* eBPF program definitions, placed in various sections, which can
+ * have custom section names. If custom names are in use, it's
+ * required to point tc to the correct section, e.g.
+ *
+ * tc filter add [...] bpf obj cls.o sec cls-tos [...]
+ *
+ * in case the program resides in __section("cls-tos").
+ *
+ * Default section for cls_bpf is: "classifier", for act_bpf is:
+ * "action". Naturally, if for example multiple actions are present
+ * in the same file, they need to have distinct section names.
+ *
+ * It is however not required to have multiple programs sharing
+ * a file.
+ */
+__section("classifier")
+int cls_main(struct __sk_buff *skb)
+{
+ struct flow_keys flow;
+
+ if (!flow_dissector(skb, &flow))
+ return 0; /* No match in cls_bpf. */
+
+ cls_update_proto_map(skb, &flow);
+ cls_update_queue_map(skb);
+
+ return flow.ip_proto;
+}
+
+static inline void act_update_drop_map(void)
+{
+ uint32_t *count, cpu = get_smp_processor_id();
+
+ count = map_lookup_elem(&map_drops, &cpu);
+ if (count)
+ /* Only this cpu is accessing this element. */
+ (*count)++;
+}
+
+__section("action-mark")
+int act_mark_main(struct __sk_buff *skb)
+{
+ /* You could also mangle skb data here with the helper function
+ * BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could
+ * do that already in the classifier itself as a merged combination
+ * of classifier'n'action model.
+ */
+
+ if (skb->mark == 0xcafe) {
+ act_update_drop_map();
+ return TC_ACT_SHOT;
+ }
+
+ /* Default configured tc opcode. */
+ return TC_ACT_UNSPEC;
+}
+
+__section("action-rand")
+int act_rand_main(struct __sk_buff *skb)
+{
+ /* Sorry, we're near event horizon ... */
+ if ((get_prandom_u32() & 3) == 0) {
+ act_update_drop_map();
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_UNSPEC;
+}
+
+/* Last but not least, the file contains a license. Some future helper
+ * functions may only be available with a GPL license.
+ */
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_shared.c b/examples/bpf/bpf_shared.c
new file mode 100644
index 00000000..accc0adf
--- /dev/null
+++ b/examples/bpf/bpf_shared.c
@@ -0,0 +1,48 @@
+#include "../../include/bpf_api.h"
+
+/* Minimal, stand-alone toy map pinning example:
+ *
+ * clang -target bpf -O2 [...] -o bpf_shared.o -c bpf_shared.c
+ * tc filter add dev foo parent 1: bpf obj bpf_shared.o sec egress
+ * tc filter add dev foo parent ffff: bpf obj bpf_shared.o sec ingress
+ *
+ * Both classifier will share the very same map instance in this example,
+ * so map content can be accessed from ingress *and* egress side!
+ *
+ * This example has a pinning of PIN_OBJECT_NS, so it's private and
+ * thus shared among various program sections within the object.
+ *
+ * A setting of PIN_GLOBAL_NS would place it into a global namespace,
+ * so that it can be shared among different object files. A setting
+ * of PIN_NONE (= 0) means no sharing, so each tc invocation a new map
+ * instance is being created.
+ */
+
+BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1); /* or PIN_GLOBAL_NS, or PIN_NONE */
+
+__section("egress")
+int emain(struct __sk_buff *skb)
+{
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ lock_xadd(val, 1);
+
+ return BPF_H_DEFAULT;
+}
+
+__section("ingress")
+int imain(struct __sk_buff *skb)
+{
+ char fmt[] = "map val: %d\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val);
+
+ return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/bpf/bpf_shared.h b/examples/bpf/bpf_shared.h
new file mode 100644
index 00000000..a24038dd
--- /dev/null
+++ b/examples/bpf/bpf_shared.h
@@ -0,0 +1,22 @@
+#ifndef __BPF_SHARED__
+#define __BPF_SHARED__
+
+enum {
+ BPF_MAP_ID_PROTO,
+ BPF_MAP_ID_QUEUE,
+ BPF_MAP_ID_DROPS,
+ __BPF_MAP_ID_MAX,
+#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX
+};
+
+struct count_tuple {
+ long packets; /* type long for lock_xadd() */
+ long bytes;
+};
+
+struct count_queue {
+ long total;
+ long mismatch;
+};
+
+#endif /* __BPF_SHARED__ */
diff --git a/examples/bpf/bpf_sys.h b/examples/bpf/bpf_sys.h
new file mode 100644
index 00000000..6e4f09e2
--- /dev/null
+++ b/examples/bpf/bpf_sys.h
@@ -0,0 +1,23 @@
+#ifndef __BPF_SYS__
+#define __BPF_SYS__
+
+#include <sys/syscall.h>
+#include <linux/bpf.h>
+
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static inline int bpf_lookup_elem(int fd, void *key, void *value)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = bpf_ptr_to_u64(key),
+ .value = bpf_ptr_to_u64(value),
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+#endif /* __BPF_SYS__ */
diff --git a/examples/bpf/bpf_tailcall.c b/examples/bpf/bpf_tailcall.c
new file mode 100644
index 00000000..040790d0
--- /dev/null
+++ b/examples/bpf/bpf_tailcall.c
@@ -0,0 +1,99 @@
+#include "../../include/bpf_api.h"
+
+#define ENTRY_INIT 3
+#define ENTRY_0 0
+#define ENTRY_1 1
+#define MAX_JMP_SIZE 2
+
+#define FOO 42
+#define BAR 43
+
+/* This example doesn't really do anything useful, but it's purpose is to
+ * demonstrate eBPF tail calls on a very simple example.
+ *
+ * cls_entry() is our classifier entry point, from there we jump based on
+ * skb->hash into cls_case1() or cls_case2(). They are both part of the
+ * program array jmp_tc. Indicated via __section_tail(), the tc loader
+ * populates the program arrays with the loaded file descriptors already.
+ *
+ * To demonstrate nested jumps, cls_case2() jumps within the same jmp_tc
+ * array to cls_case1(). And whenever we arrive at cls_case1(), we jump
+ * into cls_exit(), part of the jump array jmp_ex.
+ *
+ * Also, to show it's possible, all programs share map_sh and dump the value
+ * that the entry point incremented. The sections that are loaded into a
+ * program array can be atomically replaced during run-time, e.g. to change
+ * classifier behaviour.
+ */
+
+BPF_PROG_ARRAY(jmp_tc, FOO, PIN_OBJECT_NS, MAX_JMP_SIZE);
+BPF_PROG_ARRAY(jmp_ex, BAR, PIN_OBJECT_NS, 1);
+
+BPF_ARRAY4(map_sh, 0, PIN_OBJECT_NS, 1);
+
+__section_tail(FOO, ENTRY_0)
+int cls_case1(struct __sk_buff *skb)
+{
+ char fmt[] = "case1: map-val: %d from:%u\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]);
+
+ skb->cb[0] = ENTRY_0;
+ tail_call(skb, &jmp_ex, ENTRY_0);
+
+ return BPF_H_DEFAULT;
+}
+
+__section_tail(FOO, ENTRY_1)
+int cls_case2(struct __sk_buff *skb)
+{
+ char fmt[] = "case2: map-val: %d from:%u\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]);
+
+ skb->cb[0] = ENTRY_1;
+ tail_call(skb, &jmp_tc, ENTRY_0);
+
+ return BPF_H_DEFAULT;
+}
+
+__section_tail(BAR, ENTRY_0)
+int cls_exit(struct __sk_buff *skb)
+{
+ char fmt[] = "exit: map-val: %d from:%u\n";
+ int key = 0, *val;
+
+ val = map_lookup_elem(&map_sh, &key);
+ if (val)
+ trace_printk(fmt, sizeof(fmt), *val, skb->cb[0]);
+
+ /* Termination point. */
+ return BPF_H_DEFAULT;
+}
+
+__section_cls_entry
+int cls_entry(struct __sk_buff *skb)
+{
+ char fmt[] = "fallthrough\n";
+ int key = 0, *val;
+
+ /* For transferring state, we can use skb->cb[0] ... skb->cb[4]. */
+ val = map_lookup_elem(&map_sh, &key);
+ if (val) {
+ lock_xadd(val, 1);
+
+ skb->cb[0] = ENTRY_INIT;
+ tail_call(skb, &jmp_tc, skb->hash & (MAX_JMP_SIZE - 1));
+ }
+
+ trace_printk(fmt, sizeof(fmt));
+ return BPF_H_DEFAULT;
+}
+
+BPF_LICENSE("GPL");
diff --git a/examples/cbq.init-v0.7.3 b/examples/cbq.init-v0.7.3
index 35a0a05e..1bc0d446 100644
--- a/examples/cbq.init-v0.7.3
+++ b/examples/cbq.init-v0.7.3
@@ -578,14 +578,14 @@ cbq_show () {
### Check configuration and load DEVICES, DEVFIELDS and CLASSLIST from $1
cbq_init () {
### Get a list of configured classes
- CLASSLIST=`find $1 \( -type f -or -type l \) -name 'cbq-*' \
- -not -name '*~' -maxdepth 1 -printf "%f\n"| sort`
+ CLASSLIST=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
+ -not -name '*~' -printf "%f\n"| sort`
[ -z "$CLASSLIST" ] &&
cbq_failure "no configuration files found in $1!"
### Gather all DEVICE fields from $1/cbq-*
- DEVFIELDS=`find $1 \( -type f -or -type l \) -name 'cbq-*' \
- -not -name '*~' -maxdepth 1| xargs sed -n 's/#.*//; \
+ DEVFIELDS=`find $1 -maxdepth 1 \( -type f -or -type l \) -name 'cbq-*' \
+ -not -name '*~' | xargs sed -n 's/#.*//; \
s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \
{ s/.*=//; p; }'| sort -u`
[ -z "$DEVFIELDS" ] &&
diff --git a/genl/ctrl.c b/genl/ctrl.c
index 35461290..b7a8878c 100644
--- a/genl/ctrl.c
+++ b/genl/ctrl.c
@@ -67,7 +67,7 @@ int genl_ctrl_resolve_family(const char *family)
addattr_l(nlh, 128, CTRL_ATTR_FAMILY_NAME, family, strlen(family) + 1);
- if (rtnl_talk(&rth, nlh, 0, 0, nlh) < 0) {
+ if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
fprintf(stderr, "Error talking to the kernel\n");
goto errout;
}
@@ -177,8 +177,9 @@ static int print_ctrl_grp(FILE *fp, struct rtattr *arg, __u32 ctrl_ver)
/*
* The controller sends one nlmsg per family
*/
-static int print_ctrl(const struct sockaddr_nl *who, struct nlmsghdr *n,
- void *arg)
+static int print_ctrl(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
{
struct rtattr *tb[CTRL_ATTR_MAX + 1];
struct genlmsghdr *ghdr = NLMSG_DATA(n);
@@ -281,6 +282,12 @@ static int print_ctrl(const struct sockaddr_nl *who, struct nlmsghdr *n,
return 0;
}
+static int print_ctrl2(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg)
+{
+ return print_ctrl(who, NULL, n, arg);
+}
+
static int ctrl_list(int cmd, int argc, char **argv)
{
struct rtnl_handle rth;
@@ -334,12 +341,12 @@ static int ctrl_list(int cmd, int argc, char **argv)
goto ctrl_done;
}
- if (rtnl_talk(&rth, nlh, 0, 0, nlh) < 0) {
+ if (rtnl_talk(&rth, nlh, nlh, sizeof(req)) < 0) {
fprintf(stderr, "Error talking to the kernel\n");
goto ctrl_done;
}
- if (print_ctrl(NULL, nlh, (void *) stdout) < 0) {
+ if (print_ctrl2(NULL, nlh, (void *) stdout) < 0) {
fprintf(stderr, "Dump terminated\n");
goto ctrl_done;
}
@@ -355,7 +362,7 @@ static int ctrl_list(int cmd, int argc, char **argv)
goto ctrl_done;
}
- rtnl_dump_filter(&rth, print_ctrl, stdout);
+ rtnl_dump_filter(&rth, print_ctrl2, stdout);
}
@@ -408,5 +415,5 @@ static int parse_ctrl(struct genl_util *a, int argc, char **argv)
struct genl_util ctrl_genl_util = {
.name = "ctrl",
.parse_genlopt = parse_ctrl,
- .print_genlopt = print_ctrl,
+ .print_genlopt = print_ctrl2,
};
diff --git a/genl/genl.c b/genl/genl.c
index 49b65960..e33fafdf 100644
--- a/genl/genl.c
+++ b/genl/genl.c
@@ -54,7 +54,7 @@ static int parse_nofopt(struct genl_util *f, int argc, char **argv)
return 0;
}
-static struct genl_util *get_genl_kind(char *str)
+static struct genl_util *get_genl_kind(const char *str)
{
void *dlh;
char buf[256];
diff --git a/include/SNAPSHOT.h b/include/SNAPSHOT.h
index 8bd0c561..58d36327 100644
--- a/include/SNAPSHOT.h
+++ b/include/SNAPSHOT.h
@@ -1 +1 @@
-static const char SNAPSHOT[] = "150413";
+static const char SNAPSHOT[] = "160111";
diff --git a/include/bpf_api.h b/include/bpf_api.h
new file mode 100644
index 00000000..0666a312
--- /dev/null
+++ b/include/bpf_api.h
@@ -0,0 +1,225 @@
+#ifndef __BPF_API__
+#define __BPF_API__
+
+/* Note:
+ *
+ * This file can be included into eBPF kernel programs. It contains
+ * a couple of useful helper functions, map/section ABI (bpf_elf.h),
+ * misc macros and some eBPF specific LLVM built-ins.
+ */
+
+#include <stdint.h>
+
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+#include <asm/byteorder.h>
+
+#include "bpf_elf.h"
+
+/** Misc macros. */
+
+#ifndef __stringify
+# define __stringify(X) #X
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((__unused__))
+#endif
+
+#ifndef offsetof
+# define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
+#endif
+
+#ifndef likely
+# define likely(X) __builtin_expect(!!(X), 1)
+#endif
+
+#ifndef unlikely
+# define unlikely(X) __builtin_expect(!!(X), 0)
+#endif
+
+#ifndef htons
+# define htons(X) __constant_htons((X))
+#endif
+
+#ifndef ntohs
+# define ntohs(X) __constant_ntohs((X))
+#endif
+
+#ifndef htonl
+# define htonl(X) __constant_htonl((X))
+#endif
+
+#ifndef ntohl
+# define ntohl(X) __constant_ntohl((X))
+#endif
+
+/** Section helper macros. */
+
+#ifndef __section
+# define __section(NAME) \
+ __attribute__((section(NAME), used))
+#endif
+
+#ifndef __section_tail
+# define __section_tail(ID, KEY) \
+ __section(__stringify(ID) "/" __stringify(KEY))
+#endif
+
+#ifndef __section_cls_entry
+# define __section_cls_entry \
+ __section(ELF_SECTION_CLASSIFIER)
+#endif
+
+#ifndef __section_act_entry
+# define __section_act_entry \
+ __section(ELF_SECTION_ACTION)
+#endif
+
+#ifndef __section_license
+# define __section_license \
+ __section(ELF_SECTION_LICENSE)
+#endif
+
+#ifndef __section_maps
+# define __section_maps \
+ __section(ELF_SECTION_MAPS)
+#endif
+
+/** Declaration helper macros. */
+
+#ifndef BPF_LICENSE
+# define BPF_LICENSE(NAME) \
+ char ____license[] __section_license = NAME
+#endif
+
+#ifndef __BPF_MAP
+# define __BPF_MAP(NAME, TYPE, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ struct bpf_elf_map __section_maps NAME = { \
+ .type = (TYPE), \
+ .id = (ID), \
+ .size_key = (SIZE_KEY), \
+ .size_value = (SIZE_VALUE), \
+ .pinning = (PIN), \
+ .max_elem = (MAX_ELEM), \
+ }
+#endif
+
+#ifndef BPF_HASH
+# define BPF_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_HASH, ID, SIZE_KEY, SIZE_VALUE, \
+ PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY
+# define BPF_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_ARRAY, ID, sizeof(uint32_t), \
+ SIZE_VALUE, PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY2
+# define BPF_ARRAY2(NAME, ID, PIN, MAX_ELEM) \
+ BPF_ARRAY(NAME, ID, sizeof(uint16_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY4
+# define BPF_ARRAY4(NAME, ID, PIN, MAX_ELEM) \
+ BPF_ARRAY(NAME, ID, sizeof(uint32_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY8
+# define BPF_ARRAY8(NAME, ID, PIN, MAX_ELEM) \
+ BPF_ARRAY(NAME, ID, sizeof(uint64_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PROG_ARRAY
+# define BPF_PROG_ARRAY(NAME, ID, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PROG_ARRAY, ID, sizeof(uint32_t), \
+ sizeof(uint32_t), PIN, MAX_ELEM)
+#endif
+
+/** Classifier helper */
+
+#ifndef BPF_H_DEFAULT
+# define BPF_H_DEFAULT -1
+#endif
+
+/** BPF helper functions for tc. */
+
+#ifndef BPF_FUNC
+# define BPF_FUNC(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused = (void *) BPF_FUNC_##NAME
+#endif
+
+/* Map access/manipulation */
+static void *BPF_FUNC(map_lookup_elem, void *map, const void *key);
+static int BPF_FUNC(map_update_elem, void *map, const void *key,
+ const void *value, uint32_t flags);
+static int BPF_FUNC(map_delete_elem, void *map, const void *key);
+
+/* Time access */
+static uint64_t BPF_FUNC(ktime_get_ns);
+
+/* Debugging */
+static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
+
+/* Random numbers */
+static uint32_t BPF_FUNC(get_prandom_u32);
+
+/* Tail calls */
+static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
+ uint32_t index);
+
+/* System helpers */
+static uint32_t BPF_FUNC(get_smp_processor_id);
+
+/* Packet misc meta data */
+static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb);
+
+/* Packet redirection */
+static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
+static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex,
+ uint32_t flags);
+
+/* Packet manipulation */
+#define BPF_PSEUDO_HDR 0x10
+#define BPF_HAS_PSEUDO_HDR(flags) ((flags) & BPF_PSEUDO_HDR)
+#define BPF_HDR_FIELD_SIZE(flags) ((flags) & 0x0f)
+
+static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off,
+ void *from, uint32_t len, uint32_t flags);
+static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+
+/* Packet vlan encap/decap */
+static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
+ uint16_t vlan_tci);
+static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb);
+
+/* Packet tunnel encap/decap */
+static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb,
+ struct bpf_tunnel_key *to, uint32_t size, uint32_t flags);
+static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb,
+ struct bpf_tunnel_key *from, uint32_t size, uint32_t flags);
+
+/** LLVM built-ins */
+
+#ifndef lock_xadd
+# define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val))
+#endif
+
+unsigned long long load_byte(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.byte");
+
+unsigned long long load_half(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.half");
+
+unsigned long long load_word(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.word");
+
+#endif /* __BPF_API__ */
diff --git a/include/bpf_elf.h b/include/bpf_elf.h
new file mode 100644
index 00000000..31a89743
--- /dev/null
+++ b/include/bpf_elf.h
@@ -0,0 +1,39 @@
+#ifndef __BPF_ELF__
+#define __BPF_ELF__
+
+#include <asm/types.h>
+
+/* Note:
+ *
+ * Below ELF section names and bpf_elf_map structure definition
+ * are not (!) kernel ABI. It's rather a "contract" between the
+ * application and the BPF loader in tc. For compatibility, the
+ * section names should stay as-is. Introduction of aliases, if
+ * needed, are a possibility, though.
+ */
+
+/* ELF section names, etc */
+#define ELF_SECTION_LICENSE "license"
+#define ELF_SECTION_MAPS "maps"
+#define ELF_SECTION_CLASSIFIER "classifier"
+#define ELF_SECTION_ACTION "action"
+
+#define ELF_MAX_MAPS 64
+#define ELF_MAX_LICENSE_LEN 128
+
+/* Object pinning settings */
+#define PIN_NONE 0
+#define PIN_OBJECT_NS 1
+#define PIN_GLOBAL_NS 2
+
+/* ELF map definition */
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 id;
+ __u32 pinning;
+};
+
+#endif /* __BPF_ELF__ */
diff --git a/include/bpf_scm.h b/include/bpf_scm.h
new file mode 100644
index 00000000..35117d11
--- /dev/null
+++ b/include/bpf_scm.h
@@ -0,0 +1,75 @@
+#ifndef __BPF_SCM__
+#define __BPF_SCM__
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "utils.h"
+#include "bpf_elf.h"
+
+#define BPF_SCM_AUX_VER 1
+#define BPF_SCM_MAX_FDS ELF_MAX_MAPS
+#define BPF_SCM_MSG_SIZE 1024
+
+struct bpf_elf_st {
+ dev_t st_dev;
+ ino_t st_ino;
+};
+
+struct bpf_map_aux {
+ unsigned short uds_ver;
+ unsigned short num_ent;
+ char obj_name[64];
+ struct bpf_elf_st obj_st;
+ struct bpf_elf_map ent[BPF_SCM_MAX_FDS];
+};
+
+struct bpf_map_set_msg {
+ struct msghdr hdr;
+ struct iovec iov;
+ char msg_buf[BPF_SCM_MSG_SIZE];
+ struct bpf_map_aux aux;
+};
+
+static inline int *bpf_map_set_init(struct bpf_map_set_msg *msg,
+ struct sockaddr_un *addr,
+ unsigned int addr_len)
+{
+ const unsigned int cmsg_ctl_len = sizeof(int) * BPF_SCM_MAX_FDS;
+ struct cmsghdr *cmsg;
+
+ msg->iov.iov_base = &msg->aux;
+ msg->iov.iov_len = sizeof(msg->aux);
+
+ msg->hdr.msg_iov = &msg->iov;
+ msg->hdr.msg_iovlen = 1;
+
+ msg->hdr.msg_name = (struct sockaddr *)addr;
+ msg->hdr.msg_namelen = addr_len;
+
+ BUILD_BUG_ON(sizeof(msg->msg_buf) < cmsg_ctl_len);
+ msg->hdr.msg_control = &msg->msg_buf;
+ msg->hdr.msg_controllen = cmsg_ctl_len;
+
+ cmsg = CMSG_FIRSTHDR(&msg->hdr);
+ cmsg->cmsg_len = msg->hdr.msg_controllen;
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+
+ return (int *)CMSG_DATA(cmsg);
+}
+
+static inline void bpf_map_set_init_single(struct bpf_map_set_msg *msg,
+ int num)
+{
+ struct cmsghdr *cmsg;
+
+ msg->hdr.msg_controllen = CMSG_LEN(sizeof(int) * num);
+ msg->iov.iov_len = offsetof(struct bpf_map_aux, ent) +
+ sizeof(struct bpf_elf_map) * num;
+
+ cmsg = CMSG_FIRSTHDR(&msg->hdr);
+ cmsg->cmsg_len = msg->hdr.msg_controllen;
+}
+
+#endif /* __BPF_SCM__ */
diff --git a/include/color.h b/include/color.h
new file mode 100644
index 00000000..b85003ae
--- /dev/null
+++ b/include/color.h
@@ -0,0 +1,16 @@
+#ifndef __COLOR_H__
+#define __COLOR_H__ 1
+
+enum color_attr {
+ COLOR_IFNAME,
+ COLOR_MAC,
+ COLOR_INET,
+ COLOR_INET6,
+ COLOR_OPERSTATE_UP,
+ COLOR_OPERSTATE_DOWN
+};
+
+void enable_color(void);
+int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...);
+
+#endif
diff --git a/include/dlfcn.h b/include/dlfcn.h
index c54f8d8e..f15bc2c7 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -19,7 +19,7 @@ static inline void *dlopen(const char *file, int flag)
return NULL;
}
-extern void *_dlsym(const char *sym);
+void *_dlsym(const char *sym);
static inline void *dlsym(void *handle, const char *sym)
{
if (handle != _FAKE_DLFCN_HDL)
diff --git a/include/ip6tables.h b/include/ip6tables.h
index 1050593a..5f1c5b65 100644
--- a/include/ip6tables.h
+++ b/include/ip6tables.h
@@ -1,141 +1,20 @@
#ifndef _IP6TABLES_USER_H
#define _IP6TABLES_USER_H
-#include "iptables_common.h"
-#include "libiptc/libip6tc.h"
-
-struct ip6tables_rule_match
-{
- struct ip6tables_rule_match *next;
-
- struct ip6tables_match *match;
-};
-
-/* Include file for additions: new matches and targets. */
-struct ip6tables_match
-{
- struct ip6tables_match *next;
-
- ip6t_chainlabel name;
-
- const char *version;
-
- /* Size of match data. */
- size_t size;
-
- /* Size of match data relevent for userspace comparison purposes */
- size_t userspacesize;
-
- /* Function which prints out usage message. */
- void (*help)(void);
-
- /* Initialize the match. */
- void (*init)(struct ip6t_entry_match *m, unsigned int *nfcache);
-
- /* Function which parses command options; returns true if it
- ate an option */
- int (*parse)(int c, char **argv, int invert, unsigned int *flags,
- const struct ip6t_entry *entry,
- unsigned int *nfcache,
- struct ip6t_entry_match **match);
-
- /* Final check; exit if not ok. */
- void (*final_check)(unsigned int flags);
-
- /* Prints out the match iff non-NULL: put space at end */
- void (*print)(const struct ip6t_ip6 *ip,
- const struct ip6t_entry_match *match, int numeric);
-
- /* Saves the union ipt_matchinfo in parsable form to stdout. */
- void (*save)(const struct ip6t_ip6 *ip,
- const struct ip6t_entry_match *match);
-
- /* Pointer to list of extra command-line options */
- const struct option *extra_opts;
-
- /* Ignore these men behind the curtain: */
- unsigned int option_offset;
- struct ip6t_entry_match *m;
- unsigned int mflags;
-#ifdef NO_SHARED_LIBS
- unsigned int loaded; /* simulate loading so options are merged properly */
-#endif
-};
-
-struct ip6tables_target
-{
- struct ip6tables_target *next;
-
- ip6t_chainlabel name;
-
- const char *version;
-
- /* Size of target data. */
- size_t size;
-
- /* Size of target data relevent for userspace comparison purposes */
- size_t userspacesize;
-
- /* Function which prints out usage message. */
- void (*help)(void);
-
- /* Initialize the target. */
- void (*init)(struct ip6t_entry_target *t, unsigned int *nfcache);
-
- /* Function which parses command options; returns true if it
- ate an option */
- int (*parse)(int c, char **argv, int invert, unsigned int *flags,
- const struct ip6t_entry *entry,
- struct ip6t_entry_target **target);
-
- /* Final check; exit if not ok. */
- void (*final_check)(unsigned int flags);
-
- /* Prints out the target iff non-NULL: put space at end */
- void (*print)(const struct ip6t_ip6 *ip,
- const struct ip6t_entry_target *target, int numeric);
-
- /* Saves the targinfo in parsable form to stdout. */
- void (*save)(const struct ip6t_ip6 *ip,
- const struct ip6t_entry_target *target);
-
- /* Pointer to list of extra command-line options */
- struct option *extra_opts;
-
- /* Ignore these men behind the curtain: */
- unsigned int option_offset;
- struct ip6t_entry_target *t;
- unsigned int tflags;
- unsigned int used;
-#ifdef NO_SHARED_LIBS
- unsigned int loaded; /* simulate loading so options are merged properly */
-#endif
-};
-
-extern int line;
+#include <netinet/ip.h>
+#include <xtables.h>
+#include <libiptc/libip6tc.h>
+#include <iptables/internal.h>
/* Your shared library should call one of these. */
-extern void register_match6(struct ip6tables_match *me);
-extern void register_target6(struct ip6tables_target *me);
-
extern int do_command6(int argc, char *argv[], char **table,
- ip6tc_handle_t *handle);
-/* Keeping track of external matches and targets: linked lists. */
-extern struct ip6tables_match *ip6tables_matches;
-extern struct ip6tables_target *ip6tables_targets;
-
-enum ip6t_tryload {
- DONT_LOAD,
- TRY_LOAD,
- LOAD_MUST_SUCCEED
-};
+ struct xtc_handle **handle, bool restore);
-extern struct ip6tables_target *find_target(const char *name, enum ip6t_tryload);
-extern struct ip6tables_match *find_match(const char *name, enum ip6t_tryload, struct ip6tables_rule_match **match);
+extern int for_each_chain6(int (*fn)(const xt_chainlabel, int, struct xtc_handle *), int verbose, int builtinstoo, struct xtc_handle *handle);
+extern int flush_entries6(const xt_chainlabel chain, int verbose, struct xtc_handle *handle);
+extern int delete_chain6(const xt_chainlabel chain, int verbose, struct xtc_handle *handle);
+void print_rule6(const struct ip6t_entry *e, struct xtc_handle *h, const char *chain, int counters);
-extern int for_each_chain(int (*fn)(const ip6t_chainlabel, int, ip6tc_handle_t *), int verbose, int builtinstoo, ip6tc_handle_t *handle);
-extern int flush_entries(const ip6t_chainlabel chain, int verbose, ip6tc_handle_t *handle);
-extern int delete_chain(const ip6t_chainlabel chain, int verbose, ip6tc_handle_t *handle);
-extern int ip6tables_insmod(const char *modname, const char *modprobe);
+extern struct xtables_globals ip6tables_globals;
#endif /*_IP6TABLES_USER_H*/
diff --git a/include/iptables.h b/include/iptables.h
index f1e62e23..78c10abd 100644
--- a/include/iptables.h
+++ b/include/iptables.h
@@ -1,179 +1,25 @@
#ifndef _IPTABLES_USER_H
#define _IPTABLES_USER_H
-#include "iptables_common.h"
-#include "libiptc/libiptc.h"
-
-#ifndef IPT_LIB_DIR
-#define IPT_LIB_DIR "/usr/local/lib/iptables"
-#endif
-
-#ifndef IPPROTO_SCTP
-#define IPPROTO_SCTP 132
-#endif
-
-#ifndef IPT_SO_GET_REVISION_MATCH /* Old kernel source. */
-#define IPT_SO_GET_REVISION_MATCH (IPT_BASE_CTL + 2)
-#define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3)
-
-struct ipt_get_revision
-{
- char name[IPT_FUNCTION_MAXNAMELEN-1];
-
- u_int8_t revision;
-};
-#endif /* IPT_SO_GET_REVISION_MATCH Old kernel source */
-
-struct iptables_rule_match
-{
- struct iptables_rule_match *next;
-
- struct iptables_match *match;
-};
-
-/* Include file for additions: new matches and targets. */
-struct iptables_match
-{
- struct iptables_match *next;
-
- ipt_chainlabel name;
-
- /* Revision of match (0 by default). */
- u_int8_t revision;
-
- const char *version;
-
- /* Size of match data. */
- size_t size;
-
- /* Size of match data relevent for userspace comparison purposes */
- size_t userspacesize;
-
- /* Function which prints out usage message. */
- void (*help)(void);
-
- /* Initialize the match. */
- void (*init)(struct ipt_entry_match *m, unsigned int *nfcache);
-
- /* Function which parses command options; returns true if it
- ate an option */
- int (*parse)(int c, char **argv, int invert, unsigned int *flags,
- const struct ipt_entry *entry,
- unsigned int *nfcache,
- struct ipt_entry_match **match);
-
- /* Final check; exit if not ok. */
- void (*final_check)(unsigned int flags);
-
- /* Prints out the match iff non-NULL: put space at end */
- void (*print)(const struct ipt_ip *ip,
- const struct ipt_entry_match *match, int numeric);
-
- /* Saves the match info in parsable form to stdout. */
- void (*save)(const struct ipt_ip *ip,
- const struct ipt_entry_match *match);
-
- /* Pointer to list of extra command-line options */
- const struct option *extra_opts;
-
- /* Ignore these men behind the curtain: */
- unsigned int option_offset;
- struct ipt_entry_match *m;
- unsigned int mflags;
-#ifdef NO_SHARED_LIBS
- unsigned int loaded; /* simulate loading so options are merged properly */
-#endif
-};
-
-struct iptables_target
-{
- struct iptables_target *next;
-
- ipt_chainlabel name;
-
- /* Revision of target (0 by default). */
- u_int8_t revision;
-
- const char *version;
-
- /* Size of target data. */
- size_t size;
-
- /* Size of target data relevent for userspace comparison purposes */
- size_t userspacesize;
-
- /* Function which prints out usage message. */
- void (*help)(void);
-
- /* Initialize the target. */
- void (*init)(struct ipt_entry_target *t, unsigned int *nfcache);
-
- /* Function which parses command options; returns true if it
- ate an option */
- int (*parse)(int c, char **argv, int invert, unsigned int *flags,
- const struct ipt_entry *entry,
- struct ipt_entry_target **target);
-
- /* Final check; exit if not ok. */
- void (*final_check)(unsigned int flags);
-
- /* Prints out the target iff non-NULL: put space at end */
- void (*print)(const struct ipt_ip *ip,
- const struct ipt_entry_target *target, int numeric);
-
- /* Saves the targinfo in parsable form to stdout. */
- void (*save)(const struct ipt_ip *ip,
- const struct ipt_entry_target *target);
-
- /* Pointer to list of extra command-line options */
- struct option *extra_opts;
-
- /* Ignore these men behind the curtain: */
- unsigned int option_offset;
- struct ipt_entry_target *t;
- unsigned int tflags;
- unsigned int used;
-#ifdef NO_SHARED_LIBS
- unsigned int loaded; /* simulate loading so options are merged properly */
-#endif
-};
-
-extern int line;
+#include <netinet/ip.h>
+#include <xtables.h>
+#include <libiptc/libiptc.h>
+#include <iptables/internal.h>
/* Your shared library should call one of these. */
-extern void register_match(struct iptables_match *me);
-extern void register_target(struct iptables_target *me);
-extern void xtables_register_target(struct iptables_target *me);
-extern int build_st(struct iptables_target *target, struct ipt_entry_target *t);
-
-extern struct in_addr *dotted_to_addr(const char *dotted);
-extern char *addr_to_dotted(const struct in_addr *addrp);
-extern char *addr_to_anyname(const struct in_addr *addr);
-extern char *mask_to_dotted(const struct in_addr *mask);
-
-extern void parse_hostnetworkmask(const char *name, struct in_addr **addrpp,
- struct in_addr *maskp, unsigned int *naddrs);
-extern u_int16_t parse_protocol(const char *s);
-
-extern int do_command(int argc, char *argv[], char **table,
- iptc_handle_t *handle);
-/* Keeping track of external matches and targets: linked lists. */
-extern struct iptables_match *iptables_matches;
-extern struct iptables_target *iptables_targets;
+extern int do_command4(int argc, char *argv[], char **table,
+ struct xtc_handle **handle, bool restore);
+extern int delete_chain4(const xt_chainlabel chain, int verbose,
+ struct xtc_handle *handle);
+extern int flush_entries4(const xt_chainlabel chain, int verbose,
+ struct xtc_handle *handle);
+extern int for_each_chain4(int (*fn)(const xt_chainlabel, int, struct xtc_handle *),
+ int verbose, int builtinstoo, struct xtc_handle *handle);
+extern void print_rule4(const struct ipt_entry *e,
+ struct xtc_handle *handle, const char *chain, int counters);
-enum ipt_tryload {
- DONT_LOAD,
- TRY_LOAD,
- LOAD_MUST_SUCCEED
-};
+extern struct xtables_globals iptables_globals;
-extern struct iptables_target *find_target(const char *name, enum ipt_tryload);
-extern struct iptables_match *find_match(const char *name, enum ipt_tryload, struct iptables_rule_match **match);
+extern struct xtables_globals xtables_globals;
-extern int delete_chain(const ipt_chainlabel chain, int verbose,
- iptc_handle_t *handle);
-extern int flush_entries(const ipt_chainlabel chain, int verbose,
- iptc_handle_t *handle);
-extern int for_each_chain(int (*fn)(const ipt_chainlabel, int, iptc_handle_t *),
- int verbose, int builtinstoo, iptc_handle_t *handle);
#endif /*_IPTABLES_USER_H*/
diff --git a/include/iptables/internal.h b/include/iptables/internal.h
new file mode 100644
index 00000000..62a8ecb9
--- /dev/null
+++ b/include/iptables/internal.h
@@ -0,0 +1,13 @@
+#ifndef IPTABLES_INTERNAL_H
+#define IPTABLES_INTERNAL_H 1
+
+#define IPTABLES_VERSION "1.6.0"
+
+/**
+ * Program's own name and version.
+ */
+extern const char *program_name, *program_version;
+
+extern int line;
+
+#endif /* IPTABLES_INTERNAL_H */
diff --git a/include/json_writer.h b/include/json_writer.h
new file mode 100644
index 00000000..ab9a008a
--- /dev/null
+++ b/include/json_writer.h
@@ -0,0 +1,61 @@
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _JSON_WRITER_H_
+#define _JSON_WRITER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+/* Opaque class structure */
+typedef struct json_writer json_writer_t;
+
+/* Create a new JSON stream */
+json_writer_t *jsonw_new(FILE *f);
+/* End output to JSON stream */
+void jsonw_destroy(json_writer_t **self_p);
+
+/* Cause output to have pretty whitespace */
+void jsonw_pretty(json_writer_t *self, bool on);
+
+/* Add property name */
+void jsonw_name(json_writer_t *self, const char *name);
+
+/* Add value */
+void jsonw_string(json_writer_t *self, const char *value);
+void jsonw_bool(json_writer_t *self, bool value);
+void jsonw_float(json_writer_t *self, double number);
+void jsonw_uint(json_writer_t *self, uint64_t number);
+void jsonw_int(json_writer_t *self, int64_t number);
+void jsonw_null(json_writer_t *self);
+
+/* Useful Combinations of name and value */
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val);
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool value);
+void jsonw_float_field(json_writer_t *self, const char *prop, double num);
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num);
+void jsonw_null_field(json_writer_t *self, const char *prop);
+
+/* Collections */
+void jsonw_start_object(json_writer_t *self);
+void jsonw_end_object(json_writer_t *self);
+
+void jsonw_start_array(json_writer_t *self);
+void jsonw_end_array(json_writer_t *self);
+
+/* Override default exception handling */
+typedef void (jsonw_err_handler_fn)(const char *);
+
+#endif /* _JSON_WRITER_H_ */
diff --git a/include/libiptc/ipt_kernel_headers.h b/include/libiptc/ipt_kernel_headers.h
index 7e878284..a5963e94 100644
--- a/include/libiptc/ipt_kernel_headers.h
+++ b/include/libiptc/ipt_kernel_headers.h
@@ -5,22 +5,11 @@
#include <limits.h>
-#if defined(__GLIBC__) && __GLIBC__ == 2
#include <netinet/ip.h>
#include <netinet/in.h>
#include <netinet/ip_icmp.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
+#include <net/if.h>
#include <sys/types.h>
-#else /* libc5 */
-#include <sys/socket.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/if.h>
-#include <linux/icmp.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/types.h>
-#include <linux/in6.h>
-#endif
#endif
diff --git a/include/libiptc/libip6tc.h b/include/libiptc/libip6tc.h
index 7a247c46..9aed80a0 100644
--- a/include/libiptc/libip6tc.h
+++ b/include/libiptc/libip6tc.h
@@ -2,153 +2,160 @@
#define _LIBIP6TC_H
/* Library which manipulates firewall rules. Version 0.2. */
+#include <linux/types.h>
#include <libiptc/ipt_kernel_headers.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-#ifndef IP6T_MIN_ALIGN
-#define IP6T_MIN_ALIGN (__alignof__(struct ip6t_entry))
+#ifdef __cplusplus
+# include <climits>
+#else
+# include <limits.h> /* INT_MAX in ip6_tables.h */
#endif
-#define IP6T_ALIGN(s) (((s) + (IP6T_MIN_ALIGN-1)) & ~(IP6T_MIN_ALIGN-1))
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <libiptc/xtcshared.h>
-typedef char ip6t_chainlabel[32];
+#define ip6tc_handle xtc_handle
+#define ip6t_chainlabel xt_chainlabel
#define IP6TC_LABEL_ACCEPT "ACCEPT"
#define IP6TC_LABEL_DROP "DROP"
#define IP6TC_LABEL_QUEUE "QUEUE"
#define IP6TC_LABEL_RETURN "RETURN"
-/* Transparent handle type. */
-typedef struct ip6tc_handle *ip6tc_handle_t;
-
/* Does this chain exist? */
-int ip6tc_is_chain(const char *chain, const ip6tc_handle_t handle);
+int ip6tc_is_chain(const char *chain, struct xtc_handle *const handle);
/* Take a snapshot of the rules. Returns NULL on error. */
-ip6tc_handle_t ip6tc_init(const char *tablename);
+struct xtc_handle *ip6tc_init(const char *tablename);
/* Cleanup after ip6tc_init(). */
-void ip6tc_free(ip6tc_handle_t *h);
+void ip6tc_free(struct xtc_handle *h);
/* Iterator functions to run through the chains. Returns NULL at end. */
-const char *ip6tc_first_chain(ip6tc_handle_t *handle);
-const char *ip6tc_next_chain(ip6tc_handle_t *handle);
+const char *ip6tc_first_chain(struct xtc_handle *handle);
+const char *ip6tc_next_chain(struct xtc_handle *handle);
/* Get first rule in the given chain: NULL for empty chain. */
const struct ip6t_entry *ip6tc_first_rule(const char *chain,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Returns NULL when rules run out. */
const struct ip6t_entry *ip6tc_next_rule(const struct ip6t_entry *prev,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Returns a pointer to the target name of this position. */
const char *ip6tc_get_target(const struct ip6t_entry *e,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Is this a built-in chain? */
-int ip6tc_builtin(const char *chain, const ip6tc_handle_t handle);
+int ip6tc_builtin(const char *chain, struct xtc_handle *const handle);
/* Get the policy of a given built-in chain */
const char *ip6tc_get_policy(const char *chain,
- struct ip6t_counters *counters,
- ip6tc_handle_t *handle);
+ struct xt_counters *counters,
+ struct xtc_handle *handle);
/* These functions return TRUE for OK or 0 and set errno. If errno ==
0, it means there was a version error (ie. upgrade libiptc). */
/* Rule numbers start at 1 for the first rule. */
/* Insert the entry `fw' in chain `chain' into position `rulenum'. */
-int ip6tc_insert_entry(const ip6t_chainlabel chain,
+int ip6tc_insert_entry(const xt_chainlabel chain,
const struct ip6t_entry *e,
unsigned int rulenum,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Atomically replace rule `rulenum' in `chain' with `fw'. */
-int ip6tc_replace_entry(const ip6t_chainlabel chain,
+int ip6tc_replace_entry(const xt_chainlabel chain,
const struct ip6t_entry *e,
unsigned int rulenum,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Append entry `fw' to chain `chain'. Equivalent to insert with
rulenum = length of chain. */
-int ip6tc_append_entry(const ip6t_chainlabel chain,
+int ip6tc_append_entry(const xt_chainlabel chain,
const struct ip6t_entry *e,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
+
+/* Check whether a matching rule exists */
+int ip6tc_check_entry(const xt_chainlabel chain,
+ const struct ip6t_entry *origfw,
+ unsigned char *matchmask,
+ struct xtc_handle *handle);
/* Delete the first rule in `chain' which matches `fw'. */
-int ip6tc_delete_entry(const ip6t_chainlabel chain,
+int ip6tc_delete_entry(const xt_chainlabel chain,
const struct ip6t_entry *origfw,
unsigned char *matchmask,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Delete the rule in position `rulenum' in `chain'. */
-int ip6tc_delete_num_entry(const ip6t_chainlabel chain,
+int ip6tc_delete_num_entry(const xt_chainlabel chain,
unsigned int rulenum,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Check the packet `fw' on chain `chain'. Returns the verdict, or
NULL and sets errno. */
-const char *ip6tc_check_packet(const ip6t_chainlabel chain,
+const char *ip6tc_check_packet(const xt_chainlabel chain,
struct ip6t_entry *,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* Flushes the entries in the given chain (ie. empties chain). */
-int ip6tc_flush_entries(const ip6t_chainlabel chain,
- ip6tc_handle_t *handle);
+int ip6tc_flush_entries(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Zeroes the counters in a chain. */
-int ip6tc_zero_entries(const ip6t_chainlabel chain,
- ip6tc_handle_t *handle);
+int ip6tc_zero_entries(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Creates a new chain. */
-int ip6tc_create_chain(const ip6t_chainlabel chain,
- ip6tc_handle_t *handle);
+int ip6tc_create_chain(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Deletes a chain. */
-int ip6tc_delete_chain(const ip6t_chainlabel chain,
- ip6tc_handle_t *handle);
+int ip6tc_delete_chain(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Renames a chain. */
-int ip6tc_rename_chain(const ip6t_chainlabel oldname,
- const ip6t_chainlabel newname,
- ip6tc_handle_t *handle);
+int ip6tc_rename_chain(const xt_chainlabel oldname,
+ const xt_chainlabel newname,
+ struct xtc_handle *handle);
/* Sets the policy on a built-in chain. */
-int ip6tc_set_policy(const ip6t_chainlabel chain,
- const ip6t_chainlabel policy,
- struct ip6t_counters *counters,
- ip6tc_handle_t *handle);
+int ip6tc_set_policy(const xt_chainlabel chain,
+ const xt_chainlabel policy,
+ struct xt_counters *counters,
+ struct xtc_handle *handle);
/* Get the number of references to this chain */
-int ip6tc_get_references(unsigned int *ref, const ip6t_chainlabel chain,
- ip6tc_handle_t *handle);
+int ip6tc_get_references(unsigned int *ref, const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* read packet and byte counters for a specific rule */
-struct ip6t_counters *ip6tc_read_counter(const ip6t_chainlabel chain,
+struct xt_counters *ip6tc_read_counter(const xt_chainlabel chain,
unsigned int rulenum,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* zero packet and byte counters for a specific rule */
-int ip6tc_zero_counter(const ip6t_chainlabel chain,
+int ip6tc_zero_counter(const xt_chainlabel chain,
unsigned int rulenum,
- ip6tc_handle_t *handle);
+ struct xtc_handle *handle);
/* set packet and byte counters for a specific rule */
-int ip6tc_set_counter(const ip6t_chainlabel chain,
+int ip6tc_set_counter(const xt_chainlabel chain,
unsigned int rulenum,
- struct ip6t_counters *counters,
- ip6tc_handle_t *handle);
+ struct xt_counters *counters,
+ struct xtc_handle *handle);
/* Makes the actual changes. */
-int ip6tc_commit(ip6tc_handle_t *handle);
+int ip6tc_commit(struct xtc_handle *handle);
/* Get raw socket. */
-int ip6tc_get_raw_socket();
+int ip6tc_get_raw_socket(void);
/* Translates errno numbers into more human-readable form than strerror. */
const char *ip6tc_strerror(int err);
-/* Return prefix length, or -1 if not contiguous */
-int ipv6_prefix_length(const struct in6_addr *a);
+extern void dump_entries6(struct xtc_handle *const);
+
+extern const struct xtc_ops ip6tc_ops;
#endif /* _LIBIP6TC_H */
diff --git a/include/libiptc/libiptc.h b/include/libiptc/libiptc.h
index 7628bda6..24cdbdb7 100644
--- a/include/libiptc/libiptc.h
+++ b/include/libiptc/libiptc.h
@@ -2,155 +2,157 @@
#define _LIBIPTC_H
/* Library which manipulates filtering rules. */
+#include <linux/types.h>
#include <libiptc/ipt_kernel_headers.h>
+#ifdef __cplusplus
+# include <climits>
+#else
+# include <limits.h> /* INT_MAX in ip_tables.h */
+#endif
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <libiptc/xtcshared.h>
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef IPT_MIN_ALIGN
-/* ipt_entry has pointers and u_int64_t's in it, so if you align to
- it, you'll also align to any crazy matches and targets someone
- might write */
-#define IPT_MIN_ALIGN (__alignof__(struct ipt_entry))
-#endif
-
-#define IPT_ALIGN(s) (((s) + ((IPT_MIN_ALIGN)-1)) & ~((IPT_MIN_ALIGN)-1))
-
-typedef char ipt_chainlabel[32];
+#define iptc_handle xtc_handle
+#define ipt_chainlabel xt_chainlabel
#define IPTC_LABEL_ACCEPT "ACCEPT"
#define IPTC_LABEL_DROP "DROP"
#define IPTC_LABEL_QUEUE "QUEUE"
#define IPTC_LABEL_RETURN "RETURN"
-/* Transparent handle type. */
-typedef struct iptc_handle *iptc_handle_t;
-
/* Does this chain exist? */
-int iptc_is_chain(const char *chain, const iptc_handle_t handle);
+int iptc_is_chain(const char *chain, struct xtc_handle *const handle);
/* Take a snapshot of the rules. Returns NULL on error. */
-iptc_handle_t iptc_init(const char *tablename);
+struct xtc_handle *iptc_init(const char *tablename);
/* Cleanup after iptc_init(). */
-void iptc_free(iptc_handle_t *h);
+void iptc_free(struct xtc_handle *h);
/* Iterator functions to run through the chains. Returns NULL at end. */
-const char *iptc_first_chain(iptc_handle_t *handle);
-const char *iptc_next_chain(iptc_handle_t *handle);
+const char *iptc_first_chain(struct xtc_handle *handle);
+const char *iptc_next_chain(struct xtc_handle *handle);
/* Get first rule in the given chain: NULL for empty chain. */
const struct ipt_entry *iptc_first_rule(const char *chain,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Returns NULL when rules run out. */
const struct ipt_entry *iptc_next_rule(const struct ipt_entry *prev,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Returns a pointer to the target name of this entry. */
const char *iptc_get_target(const struct ipt_entry *e,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Is this a built-in chain? */
-int iptc_builtin(const char *chain, const iptc_handle_t handle);
+int iptc_builtin(const char *chain, struct xtc_handle *const handle);
/* Get the policy of a given built-in chain */
const char *iptc_get_policy(const char *chain,
- struct ipt_counters *counter,
- iptc_handle_t *handle);
+ struct xt_counters *counter,
+ struct xtc_handle *handle);
/* These functions return TRUE for OK or 0 and set errno. If errno ==
0, it means there was a version error (ie. upgrade libiptc). */
/* Rule numbers start at 1 for the first rule. */
/* Insert the entry `e' in chain `chain' into position `rulenum'. */
-int iptc_insert_entry(const ipt_chainlabel chain,
+int iptc_insert_entry(const xt_chainlabel chain,
const struct ipt_entry *e,
unsigned int rulenum,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Atomically replace rule `rulenum' in `chain' with `e'. */
-int iptc_replace_entry(const ipt_chainlabel chain,
+int iptc_replace_entry(const xt_chainlabel chain,
const struct ipt_entry *e,
unsigned int rulenum,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Append entry `e' to chain `chain'. Equivalent to insert with
rulenum = length of chain. */
-int iptc_append_entry(const ipt_chainlabel chain,
+int iptc_append_entry(const xt_chainlabel chain,
const struct ipt_entry *e,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
+
+/* Check whether a mathching rule exists */
+int iptc_check_entry(const xt_chainlabel chain,
+ const struct ipt_entry *origfw,
+ unsigned char *matchmask,
+ struct xtc_handle *handle);
/* Delete the first rule in `chain' which matches `e', subject to
matchmask (array of length == origfw) */
-int iptc_delete_entry(const ipt_chainlabel chain,
+int iptc_delete_entry(const xt_chainlabel chain,
const struct ipt_entry *origfw,
unsigned char *matchmask,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Delete the rule in position `rulenum' in `chain'. */
-int iptc_delete_num_entry(const ipt_chainlabel chain,
+int iptc_delete_num_entry(const xt_chainlabel chain,
unsigned int rulenum,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Check the packet `e' on chain `chain'. Returns the verdict, or
NULL and sets errno. */
-const char *iptc_check_packet(const ipt_chainlabel chain,
+const char *iptc_check_packet(const xt_chainlabel chain,
struct ipt_entry *entry,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* Flushes the entries in the given chain (ie. empties chain). */
-int iptc_flush_entries(const ipt_chainlabel chain,
- iptc_handle_t *handle);
+int iptc_flush_entries(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Zeroes the counters in a chain. */
-int iptc_zero_entries(const ipt_chainlabel chain,
- iptc_handle_t *handle);
+int iptc_zero_entries(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Creates a new chain. */
-int iptc_create_chain(const ipt_chainlabel chain,
- iptc_handle_t *handle);
+int iptc_create_chain(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Deletes a chain. */
-int iptc_delete_chain(const ipt_chainlabel chain,
- iptc_handle_t *handle);
+int iptc_delete_chain(const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* Renames a chain. */
-int iptc_rename_chain(const ipt_chainlabel oldname,
- const ipt_chainlabel newname,
- iptc_handle_t *handle);
+int iptc_rename_chain(const xt_chainlabel oldname,
+ const xt_chainlabel newname,
+ struct xtc_handle *handle);
/* Sets the policy on a built-in chain. */
-int iptc_set_policy(const ipt_chainlabel chain,
- const ipt_chainlabel policy,
- struct ipt_counters *counters,
- iptc_handle_t *handle);
+int iptc_set_policy(const xt_chainlabel chain,
+ const xt_chainlabel policy,
+ struct xt_counters *counters,
+ struct xtc_handle *handle);
/* Get the number of references to this chain */
int iptc_get_references(unsigned int *ref,
- const ipt_chainlabel chain,
- iptc_handle_t *handle);
+ const xt_chainlabel chain,
+ struct xtc_handle *handle);
/* read packet and byte counters for a specific rule */
-struct ipt_counters *iptc_read_counter(const ipt_chainlabel chain,
+struct xt_counters *iptc_read_counter(const xt_chainlabel chain,
unsigned int rulenum,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* zero packet and byte counters for a specific rule */
-int iptc_zero_counter(const ipt_chainlabel chain,
+int iptc_zero_counter(const xt_chainlabel chain,
unsigned int rulenum,
- iptc_handle_t *handle);
+ struct xtc_handle *handle);
/* set packet and byte counters for a specific rule */
-int iptc_set_counter(const ipt_chainlabel chain,
+int iptc_set_counter(const xt_chainlabel chain,
unsigned int rulenum,
- struct ipt_counters *counters,
- iptc_handle_t *handle);
+ struct xt_counters *counters,
+ struct xtc_handle *handle);
/* Makes the actual changes. */
-int iptc_commit(iptc_handle_t *handle);
+int iptc_commit(struct xtc_handle *handle);
/* Get raw socket. */
int iptc_get_raw_socket(void);
@@ -158,6 +160,10 @@ int iptc_get_raw_socket(void);
/* Translates errno numbers into more human-readable form than strerror. */
const char *iptc_strerror(int err);
+extern void dump_entries(struct xtc_handle *const);
+
+extern const struct xtc_ops iptc_ops;
+
#ifdef __cplusplus
}
#endif
diff --git a/include/libiptc/libxtc.h b/include/libiptc/libxtc.h
new file mode 100644
index 00000000..37010188
--- /dev/null
+++ b/include/libiptc/libxtc.h
@@ -0,0 +1,33 @@
+#ifndef _LIBXTC_H
+#define _LIBXTC_H
+/* Library which manipulates filtering rules. */
+
+#include <libiptc/ipt_kernel_headers.h>
+#include <linux/netfilter/x_tables.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef XT_MIN_ALIGN
+/* xt_entry has pointers and u_int64_t's in it, so if you align to
+ it, you'll also align to any crazy matches and targets someone
+ might write */
+#define XT_MIN_ALIGN (__alignof__(struct xt_entry))
+#endif
+
+#ifndef XT_ALIGN
+#define XT_ALIGN(s) (((s) + ((XT_MIN_ALIGN)-1)) & ~((XT_MIN_ALIGN)-1))
+#endif
+
+#define XTC_LABEL_ACCEPT "ACCEPT"
+#define XTC_LABEL_DROP "DROP"
+#define XTC_LABEL_QUEUE "QUEUE"
+#define XTC_LABEL_RETURN "RETURN"
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBXTC_H */
diff --git a/include/libiptc/xtcshared.h b/include/libiptc/xtcshared.h
new file mode 100644
index 00000000..773ebc4c
--- /dev/null
+++ b/include/libiptc/xtcshared.h
@@ -0,0 +1,20 @@
+#ifndef _LIBXTC_SHARED_H
+#define _LIBXTC_SHARED_H 1
+
+typedef char xt_chainlabel[32];
+struct xtc_handle;
+struct xt_counters;
+
+struct xtc_ops {
+ int (*commit)(struct xtc_handle *);
+ void (*free)(struct xtc_handle *);
+ int (*builtin)(const char *, struct xtc_handle *const);
+ int (*is_chain)(const char *, struct xtc_handle *const);
+ int (*flush_entries)(const xt_chainlabel, struct xtc_handle *);
+ int (*create_chain)(const xt_chainlabel, struct xtc_handle *);
+ int (*set_policy)(const xt_chainlabel, const xt_chainlabel,
+ struct xt_counters *, struct xtc_handle *);
+ const char *(*strerror)(int);
+};
+
+#endif /* _LIBXTC_SHARED_H */
diff --git a/include/libnetlink.h b/include/libnetlink.h
index 898275b8..431189e2 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -20,70 +20,100 @@ struct rtnl_handle
__u32 dump;
int proto;
FILE *dump_fp;
+#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01
+ int flags;
};
extern int rcvbuf;
-extern int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions)
+int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions)
__attribute__((warn_unused_result));
-extern int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions,
+int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions,
int protocol)
__attribute__((warn_unused_result));
-extern void rtnl_close(struct rtnl_handle *rth);
-extern int rtnl_wilddump_request(struct rtnl_handle *rth, int fam, int type)
+void rtnl_close(struct rtnl_handle *rth);
+int rtnl_wilddump_request(struct rtnl_handle *rth, int fam, int type)
__attribute__((warn_unused_result));
-extern int rtnl_wilddump_req_filter(struct rtnl_handle *rth, int fam, int type,
+int rtnl_wilddump_req_filter(struct rtnl_handle *rth, int fam, int type,
__u32 filt_mask)
__attribute__((warn_unused_result));
-extern int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req,
+int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req,
int len)
__attribute__((warn_unused_result));
+int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n)
+ __attribute__((warn_unused_result));
+
+struct rtnl_ctrl_data {
+ int nsid;
+};
typedef int (*rtnl_filter_t)(const struct sockaddr_nl *,
struct nlmsghdr *n, void *);
+typedef int (*rtnl_listen_filter_t)(const struct sockaddr_nl *,
+ struct rtnl_ctrl_data *,
+ struct nlmsghdr *n, void *);
+
struct rtnl_dump_filter_arg
{
rtnl_filter_t filter;
void *arg1;
+ __u16 nc_flags;
};
-extern int rtnl_dump_filter_l(struct rtnl_handle *rth,
+int rtnl_dump_filter_l(struct rtnl_handle *rth,
const struct rtnl_dump_filter_arg *arg);
-extern int rtnl_dump_filter(struct rtnl_handle *rth, rtnl_filter_t filter,
- void *arg);
-extern int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
- unsigned groups, struct nlmsghdr *answer)
+int rtnl_dump_filter_nc(struct rtnl_handle *rth,
+ rtnl_filter_t filter,
+ void *arg, __u16 nc_flags);
+#define rtnl_dump_filter(rth, filter, arg) \
+ rtnl_dump_filter_nc(rth, filter, arg, 0)
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t len)
__attribute__((warn_unused_result));
-extern int rtnl_send(struct rtnl_handle *rth, const void *buf, int)
+int rtnl_send(struct rtnl_handle *rth, const void *buf, int)
__attribute__((warn_unused_result));
-extern int rtnl_send_check(struct rtnl_handle *rth, const void *buf, int)
+int rtnl_send_check(struct rtnl_handle *rth, const void *buf, int)
__attribute__((warn_unused_result));
-extern int addattr(struct nlmsghdr *n, int maxlen, int type);
-extern int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data);
-extern int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data);
-extern int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
-extern int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data);
-extern int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data);
-
-extern int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen);
-extern int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len);
-extern struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type);
-extern int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest);
-extern struct rtattr *addattr_nest_compat(struct nlmsghdr *n, int maxlen, int type, const void *data, int len);
-extern int addattr_nest_compat_end(struct nlmsghdr *n, struct rtattr *nest);
-extern int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data);
-extern int rta_addattr_l(struct rtattr *rta, int maxlen, int type, const void *data, int alen);
-
-extern int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len);
-extern int parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta,
+int addattr(struct nlmsghdr *n, int maxlen, int type);
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data);
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data);
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data);
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data);
+
+int addattr_l(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen);
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len);
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type);
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest);
+struct rtattr *addattr_nest_compat(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int len);
+int addattr_nest_compat_end(struct nlmsghdr *n, struct rtattr *nest);
+int rta_addattr8(struct rtattr *rta, int maxlen, int type, __u8 data);
+int rta_addattr16(struct rtattr *rta, int maxlen, int type, __u16 data);
+int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data);
+int rta_addattr64(struct rtattr *rta, int maxlen, int type, __u64 data);
+int rta_addattr_l(struct rtattr *rta, int maxlen, int type,
+ const void *data, int alen);
+
+int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len);
+int parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta,
int len, unsigned short flags);
-extern int parse_rtattr_byindex(struct rtattr *tb[], int max, struct rtattr *rta, int len);
-extern struct rtattr *parse_rtattr_one(int type, struct rtattr *rta, int len);
-extern int __parse_rtattr_nested_compat(struct rtattr *tb[], int max, struct rtattr *rta, int len);
+int parse_rtattr_byindex(struct rtattr *tb[], int max,
+ struct rtattr *rta, int len);
+struct rtattr *parse_rtattr_one(int type, struct rtattr *rta, int len);
+int __parse_rtattr_nested_compat(struct rtattr *tb[], int max, struct rtattr *rta, int len);
+
+struct rtattr *rta_nest(struct rtattr *rta, int maxlen, int type);
+int rta_nest_end(struct rtattr *rta, struct rtattr *nest);
+
+#define RTA_TAIL(rta) \
+ ((struct rtattr *) (((void *) (rta)) + \
+ RTA_ALIGN((rta)->rta_len)))
#define parse_rtattr_nested(tb, max, rta) \
(parse_rtattr((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta)))
@@ -118,10 +148,11 @@ static inline const char *rta_getattr_str(const struct rtattr *rta)
return (const char *)RTA_DATA(rta);
}
-extern int rtnl_listen(struct rtnl_handle *, rtnl_filter_t handler,
- void *jarg);
-extern int rtnl_from_file(FILE *, rtnl_filter_t handler,
- void *jarg);
+int rtnl_listen_all_nsid(struct rtnl_handle *);
+int rtnl_listen(struct rtnl_handle *, rtnl_listen_filter_t handler,
+ void *jarg);
+int rtnl_from_file(FILE *, rtnl_listen_filter_t handler,
+ void *jarg);
#define NLMSG_TAIL(nmsg) \
((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index 00000000..f970f9db
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,326 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __LINUX_BPF_H__
+#define __LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+};
+
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+};
+
+#define BPF_PSEUDO_MAP_FD 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* checked when prog_type=kprobe */
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ };
+} __attribute__((aligned(8)));
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+ BPF_FUNC_unspec,
+ BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
+ BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
+ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+ BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */
+ BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */
+ BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
+ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+
+ /**
+ * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
+ * @skb: pointer to skb
+ * @offset: offset within packet from skb->mac_header
+ * @from: pointer where to copy bytes from
+ * @len: number of bytes to store into packet
+ * @flags: bit 0 - if true, recompute skb->csum
+ * other bits - reserved
+ * Return: 0 on success
+ */
+ BPF_FUNC_skb_store_bytes,
+
+ /**
+ * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where IP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * other bits - reserved
+ * Return: 0 on success
+ */
+ BPF_FUNC_l3_csum_replace,
+
+ /**
+ * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where TCP/UDP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * bit 4 - is pseudo header
+ * other bits - reserved
+ * Return: 0 on success
+ */
+ BPF_FUNC_l4_csum_replace,
+
+ /**
+ * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
+ * @ctx: context pointer passed to next program
+ * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+ * @index: index inside array that selects specific program to run
+ * Return: 0 on success
+ */
+ BPF_FUNC_tail_call,
+
+ /**
+ * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
+ * @skb: pointer to skb
+ * @ifindex: ifindex of the net device
+ * @flags: bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * Return: 0 on success
+ */
+ BPF_FUNC_clone_redirect,
+
+ /**
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return: current->tgid << 32 | current->pid
+ */
+ BPF_FUNC_get_current_pid_tgid,
+
+ /**
+ * u64 bpf_get_current_uid_gid(void)
+ * Return: current_gid << 32 | current_uid
+ */
+ BPF_FUNC_get_current_uid_gid,
+
+ /**
+ * bpf_get_current_comm(char *buf, int size_of_buf)
+ * stores current->comm into buf
+ * Return: 0 on success
+ */
+ BPF_FUNC_get_current_comm,
+
+ /**
+ * bpf_get_cgroup_classid(skb) - retrieve a proc's classid
+ * @skb: pointer to skb
+ * Return: classid if != 0
+ */
+ BPF_FUNC_get_cgroup_classid,
+ BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
+ BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */
+
+ /**
+ * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
+ * retrieve or populate tunnel metadata
+ * @skb: pointer to skb
+ * @key: pointer to 'struct bpf_tunnel_key'
+ * @size: size of 'struct bpf_tunnel_key'
+ * @flags: room for future extensions
+ * Retrun: 0 on success
+ */
+ BPF_FUNC_skb_get_tunnel_key,
+ BPF_FUNC_skb_set_tunnel_key,
+ BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */
+ /**
+ * bpf_redirect(ifindex, flags) - redirect to another netdev
+ * @ifindex: ifindex of the net device
+ * @flags: bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * Return: TC_ACT_REDIRECT
+ */
+ BPF_FUNC_redirect,
+
+ /**
+ * bpf_get_route_realm(skb) - retrieve a dst's tclassid
+ * @skb: pointer to skb
+ * Return: realm if != 0
+ */
+ BPF_FUNC_get_route_realm,
+
+ /**
+ * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
+ * @ctx: struct pt_regs*
+ * @map: pointer to perf_event_array map
+ * @index: index of event in the map
+ * @data: data on stack to be output as raw data
+ * @size: size of data
+ * Return: 0 on success
+ */
+ BPF_FUNC_perf_event_output,
+ BPF_FUNC_skb_load_bytes,
+ __BPF_FUNC_MAX_ID,
+};
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+};
+
+#endif /* __LINUX_BPF_H__ */
diff --git a/include/linux/can.h b/include/linux/can.h
index d9ba97f3..4af39b08 100644
--- a/include/linux/can.h
+++ b/include/linux/can.h
@@ -95,11 +95,17 @@ typedef __u32 can_err_mask_t;
* @can_dlc: frame payload length in byte (0 .. 8) aka data length code
* N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1
* mapping of the 'data length code' to the real payload length
+ * @__pad: padding
+ * @__res0: reserved / padding
+ * @__res1: reserved / padding
* @data: CAN frame payload (up to 8 byte)
*/
struct can_frame {
canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
__u8 can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */
+ __u8 __pad; /* padding */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
__u8 data[CAN_MAX_DLEN] __attribute__((aligned(8)));
};
diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index e60291e1..6dcde64c 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -43,7 +43,7 @@ enum {
FRA_UNUSED5,
FRA_FWMARK, /* mark */
FRA_FLOW, /* flow/class id */
- FRA_UNUSED6,
+ FRA_TUN_ID,
FRA_SUPPRESS_IFGROUP,
FRA_SUPPRESS_PREFIXLEN,
FRA_TABLE, /* Extended table id */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8688a985..e4f2f74c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -77,9 +77,13 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */
#define SKF_AD_VLAN_TAG_PRESENT 48
#define SKF_AD_PAY_OFFSET 52
#define SKF_AD_RANDOM 56
-#define SKF_AD_MAX 60
-#define SKF_NET_OFF (-0x100000)
-#define SKF_LL_OFF (-0x200000)
+#define SKF_AD_VLAN_TPID 60
+#define SKF_AD_MAX 64
+#define SKF_NET_OFF (-0x100000)
+#define SKF_LL_OFF (-0x200000)
+
+#define BPF_NET_OFF SKF_NET_OFF
+#define BPF_LL_OFF SKF_LL_OFF
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/fou.h b/include/linux/fou.h
index 13a78e41..744c3238 100644
--- a/include/linux/fou.h
+++ b/include/linux/fou.h
@@ -25,6 +25,7 @@ enum {
FOU_CMD_UNSPEC,
FOU_CMD_ADD,
FOU_CMD_DEL,
+ FOU_CMD_GET,
__FOU_CMD_MAX,
};
diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index cc375e42..26f0ecff 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -50,6 +50,8 @@ enum {
#define IFA_F_PERMANENT 0x80
#define IFA_F_MANAGETEMPADDR 0x100
#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
struct ifa_cacheinfo {
__u32 ifa_prefered;
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 913bd8e3..ee197a37 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -127,6 +127,7 @@ enum {
#define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */
#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */
#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */
+#define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */
struct bridge_vlan_info {
__u16 flags;
@@ -182,6 +183,7 @@ struct br_mdb_entry {
#define MDB_TEMPORARY 0
#define MDB_PERMANENT 1
__u8 state;
+ __u16 vid;
struct {
union {
__be32 ip4;
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 4678e499..bf278d65 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -42,6 +42,7 @@
#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */
#define ETH_P_PUP 0x0200 /* Xerox PUP packet */
#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */
+#define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */
#define ETH_P_IP 0x0800 /* Internet Protocol packet */
#define ETH_P_X25 0x0805 /* CCITT X.25 */
#define ETH_P_ARP 0x0806 /* Address Resolution packet */
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 3450c3fb..d91f2c97 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -147,6 +147,8 @@ enum {
IFLA_CARRIER_CHANGES,
IFLA_PHYS_SWITCH_ID,
IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
+ IFLA_PROTO_DOWN,
__IFLA_MAX
};
@@ -213,6 +215,8 @@ enum {
enum in6_addr_gen_mode {
IN6_ADDR_GEN_MODE_EUI64,
IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
};
/* Bridge section */
@@ -222,11 +226,52 @@ enum {
IFLA_BR_FORWARD_DELAY,
IFLA_BR_HELLO_TIME,
IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
__IFLA_BR_MAX,
};
#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+struct ifla_bridge_id {
+ __u8 prio[2];
+ __u8 addr[6]; /* ETH_ALEN */
+};
+
enum {
BRIDGE_MODE_UNSPEC,
BRIDGE_MODE_HAIRPIN,
@@ -245,6 +290,20 @@ enum {
IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
IFLA_BRPORT_PROXYARP, /* proxy ARP */
IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+ IFLA_BRPORT_ROOT_ID, /* designated root */
+ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */
+ IFLA_BRPORT_DESIGNATED_PORT,
+ IFLA_BRPORT_DESIGNATED_COST,
+ IFLA_BRPORT_ID,
+ IFLA_BRPORT_NO,
+ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ IFLA_BRPORT_CONFIG_PENDING,
+ IFLA_BRPORT_MESSAGE_AGE_TIMER,
+ IFLA_BRPORT_FORWARD_DELAY_TIMER,
+ IFLA_BRPORT_HOLD_TIMER,
+ IFLA_BRPORT_FLUSH,
+ IFLA_BRPORT_MULTICAST_ROUTER,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -331,6 +390,15 @@ enum macvlan_macaddr_mode {
#define MACVLAN_FLAG_NOPROMISC 1
+/* VRF section */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+
/* IPVLAN section */
enum {
IFLA_IPVLAN_UNSPEC,
@@ -373,6 +441,7 @@ enum {
IFLA_VXLAN_REMCSUM_RX,
IFLA_VXLAN_GBP,
IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ IFLA_VXLAN_COLLECT_METADATA,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -382,6 +451,23 @@ struct ifla_vxlan_port_range {
__be16 high;
};
+/* GENEVE section */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+
/* Bonding section */
enum {
@@ -409,6 +495,10 @@ enum {
IFLA_BOND_AD_LACP_RATE,
IFLA_BOND_AD_SELECT,
IFLA_BOND_AD_INFO,
+ IFLA_BOND_AD_ACTOR_SYS_PRIO,
+ IFLA_BOND_AD_USER_PORT_KEY,
+ IFLA_BOND_AD_ACTOR_SYSTEM,
+ IFLA_BOND_TLB_DYNAMIC_LB,
__IFLA_BOND_MAX,
};
@@ -434,6 +524,8 @@ enum {
IFLA_BOND_SLAVE_PERM_HWADDR,
IFLA_BOND_SLAVE_QUEUE_ID,
IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
__IFLA_BOND_SLAVE_MAX,
};
@@ -457,6 +549,11 @@ enum {
IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */
IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */
IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */
+ IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query
+ * on/off switch
+ */
+ IFLA_VF_STATS, /* network device statistics */
+ IFLA_VF_TRUST, /* Trust VF */
__IFLA_VF_MAX,
};
@@ -501,6 +598,28 @@ struct ifla_vf_link_state {
__u32 link_state;
};
+struct ifla_vf_rss_query_en {
+ __u32 vf;
+ __u32 setting;
+};
+
+enum {
+ IFLA_VF_STATS_RX_PACKETS,
+ IFLA_VF_STATS_TX_PACKETS,
+ IFLA_VF_STATS_RX_BYTES,
+ IFLA_VF_STATS_TX_BYTES,
+ IFLA_VF_STATS_BROADCAST,
+ IFLA_VF_STATS_MULTICAST,
+ __IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
+struct ifla_vf_trust {
+ __u32 vf;
+ __u32 setting;
+};
+
/* VF ports management section
*
* Nested layout of set/get msg is:
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index ffee5839..d5ecb425 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -50,6 +50,12 @@
#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
#define TUNSETVNETLE _IOW('T', 220, int)
#define TUNGETVNETLE _IOR('T', 221, int)
+/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on
+ * little-endian hosts. Not all kernel configurations support them, but all
+ * configurations that support SET also support GET.
+ */
+#define TUNSETVNETBE _IOW('T', 222, int)
+#define TUNGETVNETBE _IOR('T', 223, int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 102ce7aa..f0201ca0 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -112,6 +112,7 @@ enum {
IFLA_GRE_ENCAP_FLAGS,
IFLA_GRE_ENCAP_SPORT,
IFLA_GRE_ENCAP_DPORT,
+ IFLA_GRE_COLLECT_METADATA,
__IFLA_GRE_MAX,
};
diff --git a/include/linux/ila.h b/include/linux/ila.h
new file mode 100644
index 00000000..4f9e1dea
--- /dev/null
+++ b/include/linux/ila.h
@@ -0,0 +1,37 @@
+/* ila.h - ILA Interface */
+
+#ifndef _LINUX_ILA_H
+#define _LINUX_ILA_H
+
+/* NETLINK_GENERIC related info */
+#define ILA_GENL_NAME "ila"
+#define ILA_GENL_VERSION 0x1
+
+enum {
+ ILA_ATTR_UNSPEC,
+ ILA_ATTR_LOCATOR, /* u64 */
+ ILA_ATTR_IDENTIFIER, /* u64 */
+ ILA_ATTR_LOCATOR_MATCH, /* u64 */
+ ILA_ATTR_IFINDEX, /* s32 */
+ ILA_ATTR_DIR, /* u32 */
+
+ __ILA_ATTR_MAX,
+};
+
+#define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1)
+
+enum {
+ ILA_CMD_UNSPEC,
+ ILA_CMD_ADD,
+ ILA_CMD_DEL,
+ ILA_CMD_GET,
+
+ __ILA_CMD_MAX,
+};
+
+#define ILA_CMD_MAX (__ILA_CMD_MAX - 1)
+
+#define ILA_DIR_IN (1 << 0)
+#define ILA_DIR_OUT (1 << 1)
+
+#endif /* _LINUX_ILA_H */
diff --git a/include/linux/in.h b/include/linux/in.h
new file mode 100644
index 00000000..194b43be
--- /dev/null
+++ b/include/linux/in.h
@@ -0,0 +1,299 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions of the Internet Protocol.
+ *
+ * Version: @(#)in.h 1.0.1 04/21/93
+ *
+ * Authors: Original taken from the GNU Project <netinet/in.h> file.
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IN_H
+#define _LINUX_IN_H
+
+#include <linux/types.h>
+#include <linux/libc-compat.h>
+#include <linux/socket.h>
+
+#if __UAPI_DEF_IN_IPPROTO
+/* Standard well-defined IP protocols. */
+enum {
+ IPPROTO_IP = 0, /* Dummy protocol for TCP */
+#define IPPROTO_IP IPPROTO_IP
+ IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
+#define IPPROTO_ICMP IPPROTO_ICMP
+ IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
+#define IPPROTO_IGMP IPPROTO_IGMP
+ IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
+#define IPPROTO_IPIP IPPROTO_IPIP
+ IPPROTO_TCP = 6, /* Transmission Control Protocol */
+#define IPPROTO_TCP IPPROTO_TCP
+ IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
+#define IPPROTO_EGP IPPROTO_EGP
+ IPPROTO_PUP = 12, /* PUP protocol */
+#define IPPROTO_PUP IPPROTO_PUP
+ IPPROTO_UDP = 17, /* User Datagram Protocol */
+#define IPPROTO_UDP IPPROTO_UDP
+ IPPROTO_IDP = 22, /* XNS IDP protocol */
+#define IPPROTO_IDP IPPROTO_IDP
+ IPPROTO_TP = 29, /* SO Transport Protocol Class 4 */
+#define IPPROTO_TP IPPROTO_TP
+ IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
+#define IPPROTO_DCCP IPPROTO_DCCP
+ IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
+#define IPPROTO_IPV6 IPPROTO_IPV6
+ IPPROTO_RSVP = 46, /* RSVP Protocol */
+#define IPPROTO_RSVP IPPROTO_RSVP
+ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
+#define IPPROTO_GRE IPPROTO_GRE
+ IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
+#define IPPROTO_ESP IPPROTO_ESP
+ IPPROTO_AH = 51, /* Authentication Header protocol */
+#define IPPROTO_AH IPPROTO_AH
+ IPPROTO_MTP = 92, /* Multicast Transport Protocol */
+#define IPPROTO_MTP IPPROTO_MTP
+ IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
+#define IPPROTO_BEETPH IPPROTO_BEETPH
+ IPPROTO_ENCAP = 98, /* Encapsulation Header */
+#define IPPROTO_ENCAP IPPROTO_ENCAP
+ IPPROTO_PIM = 103, /* Protocol Independent Multicast */
+#define IPPROTO_PIM IPPROTO_PIM
+ IPPROTO_COMP = 108, /* Compression Header Protocol */
+#define IPPROTO_COMP IPPROTO_COMP
+ IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
+#define IPPROTO_SCTP IPPROTO_SCTP
+ IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
+#define IPPROTO_UDPLITE IPPROTO_UDPLITE
+ IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
+#define IPPROTO_MPLS IPPROTO_MPLS
+ IPPROTO_RAW = 255, /* Raw IP packets */
+#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_MAX
+};
+#endif
+
+#if __UAPI_DEF_IN_ADDR
+/* Internet address. */
+struct in_addr {
+ __be32 s_addr;
+};
+#endif
+
+#define IP_TOS 1
+#define IP_TTL 2
+#define IP_HDRINCL 3
+#define IP_OPTIONS 4
+#define IP_ROUTER_ALERT 5
+#define IP_RECVOPTS 6
+#define IP_RETOPTS 7
+#define IP_PKTINFO 8
+#define IP_PKTOPTIONS 9
+#define IP_MTU_DISCOVER 10
+#define IP_RECVERR 11
+#define IP_RECVTTL 12
+#define IP_RECVTOS 13
+#define IP_MTU 14
+#define IP_FREEBIND 15
+#define IP_IPSEC_POLICY 16
+#define IP_XFRM_POLICY 17
+#define IP_PASSSEC 18
+#define IP_TRANSPARENT 19
+
+/* BSD compatibility */
+#define IP_RECVRETOPTS IP_RETOPTS
+
+/* TProxy original addresses */
+#define IP_ORIGDSTADDR 20
+#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR
+
+#define IP_MINTTL 21
+#define IP_NODEFRAG 22
+#define IP_CHECKSUM 23
+#define IP_BIND_ADDRESS_NO_PORT 24
+
+/* IP_MTU_DISCOVER values */
+#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
+#define IP_PMTUDISC_WANT 1 /* Use per route hints */
+#define IP_PMTUDISC_DO 2 /* Always DF */
+#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
+/* Always use interface mtu (ignores dst pmtu) but don't set DF flag.
+ * Also incoming ICMP frag_needed notifications will be ignored on
+ * this socket to prevent accepting spoofed ones.
+ */
+#define IP_PMTUDISC_INTERFACE 4
+/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
+ * fragmented if they exeed the interface mtu
+ */
+#define IP_PMTUDISC_OMIT 5
+
+#define IP_MULTICAST_IF 32
+#define IP_MULTICAST_TTL 33
+#define IP_MULTICAST_LOOP 34
+#define IP_ADD_MEMBERSHIP 35
+#define IP_DROP_MEMBERSHIP 36
+#define IP_UNBLOCK_SOURCE 37
+#define IP_BLOCK_SOURCE 38
+#define IP_ADD_SOURCE_MEMBERSHIP 39
+#define IP_DROP_SOURCE_MEMBERSHIP 40
+#define IP_MSFILTER 41
+#define MCAST_JOIN_GROUP 42
+#define MCAST_BLOCK_SOURCE 43
+#define MCAST_UNBLOCK_SOURCE 44
+#define MCAST_LEAVE_GROUP 45
+#define MCAST_JOIN_SOURCE_GROUP 46
+#define MCAST_LEAVE_SOURCE_GROUP 47
+#define MCAST_MSFILTER 48
+#define IP_MULTICAST_ALL 49
+#define IP_UNICAST_IF 50
+
+#define MCAST_EXCLUDE 0
+#define MCAST_INCLUDE 1
+
+/* These need to appear somewhere around here */
+#define IP_DEFAULT_MULTICAST_TTL 1
+#define IP_DEFAULT_MULTICAST_LOOP 1
+
+/* Request struct for multicast socket ops */
+
+#if __UAPI_DEF_IP_MREQ
+struct ip_mreq {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_interface; /* local IP address of interface */
+};
+
+struct ip_mreqn {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_address; /* local IP address of interface */
+ int imr_ifindex; /* Interface index */
+};
+
+struct ip_mreq_source {
+ __be32 imr_multiaddr;
+ __be32 imr_interface;
+ __be32 imr_sourceaddr;
+};
+
+struct ip_msfilter {
+ __be32 imsf_multiaddr;
+ __be32 imsf_interface;
+ __u32 imsf_fmode;
+ __u32 imsf_numsrc;
+ __be32 imsf_slist[1];
+};
+
+#define IP_MSFILTER_SIZE(numsrc) \
+ (sizeof(struct ip_msfilter) - sizeof(__u32) \
+ + (numsrc) * sizeof(__u32))
+
+struct group_req {
+ __u32 gr_interface; /* interface index */
+ struct __kernel_sockaddr_storage gr_group; /* group address */
+};
+
+struct group_source_req {
+ __u32 gsr_interface; /* interface index */
+ struct __kernel_sockaddr_storage gsr_group; /* group address */
+ struct __kernel_sockaddr_storage gsr_source; /* source address */
+};
+
+struct group_filter {
+ __u32 gf_interface; /* interface index */
+ struct __kernel_sockaddr_storage gf_group; /* multicast address */
+ __u32 gf_fmode; /* filter mode */
+ __u32 gf_numsrc; /* number of sources */
+ struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
+};
+
+#define GROUP_FILTER_SIZE(numsrc) \
+ (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
+ + (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
+
+#if __UAPI_DEF_IN_PKTINFO
+struct in_pktinfo {
+ int ipi_ifindex;
+ struct in_addr ipi_spec_dst;
+ struct in_addr ipi_addr;
+};
+#endif
+
+/* Structure describing an Internet (IP) socket address. */
+#if __UAPI_DEF_SOCKADDR_IN
+#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
+struct sockaddr_in {
+ __kernel_sa_family_t sin_family; /* Address family */
+ __be16 sin_port; /* Port number */
+ struct in_addr sin_addr; /* Internet address */
+
+ /* Pad to size of `struct sockaddr'. */
+ unsigned char __pad[__SOCK_SIZE__ - sizeof(short int) -
+ sizeof(unsigned short int) - sizeof(struct in_addr)];
+};
+#define sin_zero __pad /* for BSD UNIX comp. -FvK */
+#endif
+
+#if __UAPI_DEF_IN_CLASS
+/*
+ * Definitions of the bits in an Internet address integer.
+ * On subnets, host and network parts are found according
+ * to the subnet mask, not these masks.
+ */
+#define IN_CLASSA(a) ((((long int) (a)) & 0x80000000) == 0)
+#define IN_CLASSA_NET 0xff000000
+#define IN_CLASSA_NSHIFT 24
+#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET)
+#define IN_CLASSA_MAX 128
+
+#define IN_CLASSB(a) ((((long int) (a)) & 0xc0000000) == 0x80000000)
+#define IN_CLASSB_NET 0xffff0000
+#define IN_CLASSB_NSHIFT 16
+#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET)
+#define IN_CLASSB_MAX 65536
+
+#define IN_CLASSC(a) ((((long int) (a)) & 0xe0000000) == 0xc0000000)
+#define IN_CLASSC_NET 0xffffff00
+#define IN_CLASSC_NSHIFT 8
+#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET)
+
+#define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000)
+#define IN_MULTICAST(a) IN_CLASSD(a)
+#define IN_MULTICAST_NET 0xF0000000
+
+#define IN_EXPERIMENTAL(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define IN_BADCLASS(a) IN_EXPERIMENTAL((a))
+
+/* Address to accept any incoming messages. */
+#define INADDR_ANY ((unsigned long int) 0x00000000)
+
+/* Address to send to all hosts. */
+#define INADDR_BROADCAST ((unsigned long int) 0xffffffff)
+
+/* Address indicating an error return. */
+#define INADDR_NONE ((unsigned long int) 0xffffffff)
+
+/* Network number for local host loopback. */
+#define IN_LOOPBACKNET 127
+
+/* Address to loopback in software to local host. */
+#define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
+#define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000)
+
+/* Defines for Multicast INADDR */
+#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */
+#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
+#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
+#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
+#endif
+
+/* <asm/byteorder.h> contains the htonl type stuff.. */
+#include <asm/byteorder.h>
+
+
+#endif /* _LINUX_IN_H */
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 994f4c22..aa5b66df 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -196,6 +196,7 @@ struct in6_flowlabel_req {
#define IPV6_IPSEC_POLICY 34
#define IPV6_XFRM_POLICY 35
+#define IPV6_HDRINCL 36
#endif
/*
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 7438dad7..1db41168 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -111,9 +111,11 @@ enum {
INET_DIAG_SKMEMINFO,
INET_DIAG_SHUTDOWN,
INET_DIAG_DCTCPINFO,
+ INET_DIAG_PROTOCOL, /* response attribute only */
+ INET_DIAG_SKV6ONLY,
};
-#define INET_DIAG_MAX INET_DIAG_DCTCPINFO
+#define INET_DIAG_MAX INET_DIAG_SKV6ONLY
/* INET_DIAG_MEM */
@@ -143,4 +145,8 @@ struct tcp_dctcp_info {
__u32 dctcp_ab_tot;
};
+union tcp_cc_info {
+ struct tcpvegas_info vegas;
+ struct tcp_dctcp_info dctcp;
+};
#endif /* _INET_DIAG_H_ */
diff --git a/include/linux/libc-compat.h b/include/linux/libc-compat.h
index 990332e0..9bed5b6a 100644
--- a/include/linux/libc-compat.h
+++ b/include/linux/libc-compat.h
@@ -56,6 +56,13 @@
/* GLIBC headers included first so don't define anything
* that would already be defined. */
+#define __UAPI_DEF_IN_ADDR 0
+#define __UAPI_DEF_IN_IPPROTO 0
+#define __UAPI_DEF_IN_PKTINFO 0
+#define __UAPI_DEF_IP_MREQ 0
+#define __UAPI_DEF_SOCKADDR_IN 0
+#define __UAPI_DEF_IN_CLASS 0
+
#define __UAPI_DEF_IN6_ADDR 0
/* The exception is the in6_addr macros which must be defined
* if the glibc code didn't define them. This guard matches
@@ -78,6 +85,13 @@
/* Linux headers included first, and we must define everything
* we need. The expectation is that glibc will check the
* __UAPI_DEF_* defines and adjust appropriately. */
+#define __UAPI_DEF_IN_ADDR 1
+#define __UAPI_DEF_IN_IPPROTO 1
+#define __UAPI_DEF_IN_PKTINFO 1
+#define __UAPI_DEF_IP_MREQ 1
+#define __UAPI_DEF_SOCKADDR_IN 1
+#define __UAPI_DEF_IN_CLASS 1
+
#define __UAPI_DEF_IN6_ADDR 1
/* We unconditionally define the in6_addr macros and glibc must
* coordinate. */
@@ -103,6 +117,14 @@
* that we need. */
#else /* !defined(__GLIBC__) */
+/* Definitions for in.h */
+#define __UAPI_DEF_IN_ADDR 1
+#define __UAPI_DEF_IN_IPPROTO 1
+#define __UAPI_DEF_IN_PKTINFO 1
+#define __UAPI_DEF_IP_MREQ 1
+#define __UAPI_DEF_SOCKADDR_IN 1
+#define __UAPI_DEF_IN_CLASS 1
+
/* Definitions for in6.h */
#define __UAPI_DEF_IN6_ADDR 1
#define __UAPI_DEF_IN6_ADDR_ALT 1
diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h
new file mode 100644
index 00000000..1d2f4f6c
--- /dev/null
+++ b/include/linux/lwtunnel.h
@@ -0,0 +1,43 @@
+#ifndef _LWTUNNEL_H_
+#define _LWTUNNEL_H_
+
+#include <linux/types.h>
+
+enum lwtunnel_encap_types {
+ LWTUNNEL_ENCAP_NONE,
+ LWTUNNEL_ENCAP_MPLS,
+ LWTUNNEL_ENCAP_IP,
+ LWTUNNEL_ENCAP_ILA,
+ LWTUNNEL_ENCAP_IP6,
+ __LWTUNNEL_ENCAP_MAX,
+};
+
+#define LWTUNNEL_ENCAP_MAX (__LWTUNNEL_ENCAP_MAX - 1)
+
+enum lwtunnel_ip_t {
+ LWTUNNEL_IP_UNSPEC,
+ LWTUNNEL_IP_ID,
+ LWTUNNEL_IP_DST,
+ LWTUNNEL_IP_SRC,
+ LWTUNNEL_IP_TTL,
+ LWTUNNEL_IP_TOS,
+ LWTUNNEL_IP_FLAGS,
+ __LWTUNNEL_IP_MAX,
+};
+
+#define LWTUNNEL_IP_MAX (__LWTUNNEL_IP_MAX - 1)
+
+enum lwtunnel_ip6_t {
+ LWTUNNEL_IP6_UNSPEC,
+ LWTUNNEL_IP6_ID,
+ LWTUNNEL_IP6_DST,
+ LWTUNNEL_IP6_SRC,
+ LWTUNNEL_IP6_HOPLIMIT,
+ LWTUNNEL_IP6_TC,
+ LWTUNNEL_IP6_FLAGS,
+ __LWTUNNEL_IP6_MAX,
+};
+
+#define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1)
+
+#endif /* _LWTUNNEL_H_ */
diff --git a/include/linux/mpls.h b/include/linux/mpls.h
new file mode 100644
index 00000000..a14b54b5
--- /dev/null
+++ b/include/linux/mpls.h
@@ -0,0 +1,46 @@
+#ifndef _MPLS_H
+#define _MPLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* Reference: RFC 5462, RFC 3032
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Label | TC |S| TTL |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Label: Label Value, 20 bits
+ * TC: Traffic Class field, 3 bits
+ * S: Bottom of Stack, 1 bit
+ * TTL: Time to Live, 8 bits
+ */
+
+struct mpls_label {
+ __be32 entry;
+};
+
+#define MPLS_LS_LABEL_MASK 0xFFFFF000
+#define MPLS_LS_LABEL_SHIFT 12
+#define MPLS_LS_TC_MASK 0x00000E00
+#define MPLS_LS_TC_SHIFT 9
+#define MPLS_LS_S_MASK 0x00000100
+#define MPLS_LS_S_SHIFT 8
+#define MPLS_LS_TTL_MASK 0x000000FF
+#define MPLS_LS_TTL_SHIFT 0
+
+/* Reserved labels */
+#define MPLS_LABEL_IPV4NULL 0 /* RFC3032 */
+#define MPLS_LABEL_RTALERT 1 /* RFC3032 */
+#define MPLS_LABEL_IPV6NULL 2 /* RFC3032 */
+#define MPLS_LABEL_IMPLNULL 3 /* RFC3032 */
+#define MPLS_LABEL_ENTROPY 7 /* RFC6790 */
+#define MPLS_LABEL_GAL 13 /* RFC5586 */
+#define MPLS_LABEL_OAMALERT 14 /* RFC3429 */
+#define MPLS_LABEL_EXTENSION 15 /* RFC7274 */
+
+#define MPLS_LABEL_FIRST_UNRESERVED 16 /* RFC3032 */
+
+#endif /* _MPLS_H */
diff --git a/include/linux/mpls_iptunnel.h b/include/linux/mpls_iptunnel.h
new file mode 100644
index 00000000..4132c3c5
--- /dev/null
+++ b/include/linux/mpls_iptunnel.h
@@ -0,0 +1,28 @@
+/*
+ * mpls tunnel api
+ *
+ * Authors:
+ * Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_MPLS_IPTUNNEL_H
+#define _LINUX_MPLS_IPTUNNEL_H
+
+/* MPLS tunnel attributes
+ * [RTA_ENCAP] = {
+ * [MPLS_IPTUNNEL_DST]
+ * }
+ */
+enum {
+ MPLS_IPTUNNEL_UNSPEC,
+ MPLS_IPTUNNEL_DST,
+ __MPLS_IPTUNNEL_MAX,
+};
+#define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1)
+
+#endif /* _LINUX_MPLS_IPTUNNEL_H */
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index 3873a355..788655bf 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -106,6 +106,7 @@ struct ndt_stats {
__u64 ndts_rcv_probes_ucast;
__u64 ndts_periodic_gc_runs;
__u64 ndts_forced_gc_runs;
+ __u64 ndts_table_fulls;
};
enum {
@@ -126,6 +127,7 @@ enum {
NDTPA_PROXY_QLEN, /* u32 */
NDTPA_LOCKTIME, /* u64, msecs */
NDTPA_QUEUE_LENBYTES, /* u32 */
+ NDTPA_MCAST_REPROBES, /* u32 */
__NDTPA_MAX
};
#define NDTPA_MAX (__NDTPA_MAX - 1)
diff --git a/include/linux/netconf.h b/include/linux/netconf.h
index 6ceb170d..7210fe4d 100644
--- a/include/linux/netconf.h
+++ b/include/linux/netconf.h
@@ -15,6 +15,7 @@ enum {
NETCONFA_RP_FILTER,
NETCONFA_MC_FORWARDING,
NETCONFA_PROXY_NEIGH,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
__NETCONFA_MAX
};
#define NETCONFA_MAX (__NETCONFA_MAX - 1)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index be0bc182..b71b4c97 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -4,7 +4,8 @@
#include <linux/types.h>
#include <linux/sysctl.h>
-
+#include <linux/in.h>
+#include <linux/in6.h>
/* Responses from hook functions. */
#define NF_DROP 0
@@ -49,11 +50,17 @@ enum nf_inet_hooks {
NF_INET_NUMHOOKS
};
+enum nf_dev_hooks {
+ NF_NETDEV_INGRESS,
+ NF_NETDEV_NUMHOOKS
+};
+
enum {
NFPROTO_UNSPEC = 0,
NFPROTO_INET = 1,
NFPROTO_IPV4 = 2,
NFPROTO_ARP = 3,
+ NFPROTO_NETDEV = 5,
NFPROTO_BRIDGE = 7,
NFPROTO_IPV6 = 10,
NFPROTO_DECNET = 12,
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index e0a09df1..8a7ca5c6 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -54,6 +54,7 @@ struct nlmsghdr {
#define NLM_F_ACK 4 /* Reply with ack, with zero or error code */
#define NLM_F_ECHO 8 /* Echo this request */
#define NLM_F_DUMP_INTR 16 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 32 /* Dump was filtered as requested */
/* Modifiers to GET request */
#define NLM_F_ROOT 0x100 /* specify tree root */
@@ -101,13 +102,16 @@ struct nlmsgerr {
struct nlmsghdr msg;
};
-#define NETLINK_ADD_MEMBERSHIP 1
-#define NETLINK_DROP_MEMBERSHIP 2
-#define NETLINK_PKTINFO 3
-#define NETLINK_BROADCAST_ERROR 4
-#define NETLINK_NO_ENOBUFS 5
-#define NETLINK_RX_RING 6
-#define NETLINK_TX_RING 7
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP 2
+#define NETLINK_PKTINFO 3
+#define NETLINK_BROADCAST_ERROR 4
+#define NETLINK_NO_ENOBUFS 5
+#define NETLINK_RX_RING 6
+#define NETLINK_TX_RING 7
+#define NETLINK_LISTEN_ALL_NSID 8
+#define NETLINK_LIST_MEMBERSHIPS 9
+#define NETLINK_CAP_ACK 10
struct nl_pktinfo {
__u32 group;
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 25731dfb..a323146e 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -4,75 +4,6 @@
#include <linux/types.h>
#include <linux/pkt_sched.h>
-/* I think i could have done better macros ; for now this is stolen from
- * some arch/mips code - jhs
-*/
-#define _TC_MAKE32(x) ((x))
-
-#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n))
-#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n))
-#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n))
-#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n))
-
-/* verdict bit breakdown
- *
-bit 0: when set -> this packet has been munged already
-
-bit 1: when set -> It is ok to munge this packet
-
-bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded
-assume loop
-
-bit 6,7: Where this packet was last seen
-0: Above the transmit example at the socket level
-1: on the Ingress
-2: on the Egress
-
-bit 8: when set --> Request not to classify on ingress.
-
-bits 9,10,11: redirect counter - redirect TTL. Loop avoidance
-
- *
- * */
-
-#define TC_MUNGED _TC_MAKEMASK1(0)
-#define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED))
-#define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED)
-
-#define TC_OK2MUNGE _TC_MAKEMASK1(1)
-#define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
-#define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE)
-
-#define S_TC_VERD _TC_MAKE32(2)
-#define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD)
-#define G_TC_VERD(x) _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD)
-#define V_TC_VERD(x) _TC_MAKEVALUE(x,S_TC_VERD)
-#define SET_TC_VERD(v,n) ((V_TC_VERD(n)) | (v & ~M_TC_VERD))
-
-#define S_TC_FROM _TC_MAKE32(6)
-#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM)
-#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM)
-#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM)
-#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM))
-#define AT_STACK 0x0
-#define AT_INGRESS 0x1
-#define AT_EGRESS 0x2
-
-#define TC_NCLS _TC_MAKEMASK1(8)
-#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS))
-#define CLR_TC_NCLS(v) ( v & ~TC_NCLS)
-
-#define S_TC_RTTL _TC_MAKE32(9)
-#define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL)
-#define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL)
-#define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL)
-#define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL))
-
-#define S_TC_AT _TC_MAKE32(12)
-#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT)
-#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT)
-#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT)
-#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT))
/* Action attributes */
enum {
@@ -93,8 +24,6 @@ enum {
#define TCA_ACT_NOUNBIND 0
#define TCA_ACT_REPLACE 1
#define TCA_ACT_NOREPLACE 0
-#define MAX_REC_LOOP 4
-#define MAX_RED_LOOP 4
#define TC_ACT_UNSPEC (-1)
#define TC_ACT_OK 0
@@ -104,6 +33,7 @@ enum {
#define TC_ACT_STOLEN 4
#define TC_ACT_QUEUED 5
#define TC_ACT_REPEAT 6
+#define TC_ACT_REDIRECT 7
#define TC_ACT_JUMP 0x10000000
/* Action type identifiers*/
@@ -390,6 +320,8 @@ enum {
/* BPF classifier */
+#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0)
+
enum {
TCA_BPF_UNSPEC,
TCA_BPF_ACT,
@@ -397,11 +329,44 @@ enum {
TCA_BPF_CLASSID,
TCA_BPF_OPS_LEN,
TCA_BPF_OPS,
+ TCA_BPF_FD,
+ TCA_BPF_NAME,
+ TCA_BPF_FLAGS,
__TCA_BPF_MAX,
};
#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
+/* Flower classifier */
+
+enum {
+ TCA_FLOWER_UNSPEC,
+ TCA_FLOWER_CLASSID,
+ TCA_FLOWER_INDEV,
+ TCA_FLOWER_ACT,
+ TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
+ TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
+ TCA_FLOWER_KEY_IP_PROTO, /* u8 */
+ TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
+ TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
+ TCA_FLOWER_KEY_IPV4_DST, /* be32 */
+ TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
+ TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
+ TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
+ TCA_FLOWER_KEY_TCP_SRC, /* be16 */
+ TCA_FLOWER_KEY_TCP_DST, /* be16 */
+ TCA_FLOWER_KEY_UDP_SRC, /* be16 */
+ TCA_FLOWER_KEY_UDP_DST, /* be16 */
+ __TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
/* Extended Matches */
struct tcf_ematch_tree_hdr {
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 534b8471..8cb18b44 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -72,6 +72,10 @@ struct tc_estimator {
#define TC_H_UNSPEC (0U)
#define TC_H_ROOT (0xFFFFFFFFU)
#define TC_H_INGRESS (0xFFFFFFF1U)
+#define TC_H_CLSACT TC_H_INGRESS
+
+#define TC_H_MIN_INGRESS 0xFFF2U
+#define TC_H_MIN_EGRESS 0xFFF3U
/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
enum tc_link_layer {
@@ -268,7 +272,8 @@ enum {
TCA_GRED_STAB,
TCA_GRED_DPS,
TCA_GRED_MAX_P,
- __TCA_GRED_MAX,
+ TCA_GRED_LIMIT,
+ __TCA_GRED_MAX,
};
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
@@ -679,6 +684,7 @@ enum {
TCA_CODEL_LIMIT,
TCA_CODEL_INTERVAL,
TCA_CODEL_ECN,
+ TCA_CODEL_CE_THRESHOLD,
__TCA_CODEL_MAX
};
@@ -695,6 +701,7 @@ struct tc_codel_xstats {
__u32 drop_overlimit; /* number of time max qdisc packet limit was hit */
__u32 ecn_mark; /* number of packets we ECN marked instead of dropped */
__u32 dropping; /* are we in dropping state ? */
+ __u32 ce_mark; /* number of CE marked packets because of ce_threshold */
};
/* FQ_CODEL */
@@ -707,6 +714,7 @@ enum {
TCA_FQ_CODEL_ECN,
TCA_FQ_CODEL_FLOWS,
TCA_FQ_CODEL_QUANTUM,
+ TCA_FQ_CODEL_CE_THRESHOLD,
__TCA_FQ_CODEL_MAX
};
@@ -730,6 +738,7 @@ struct tc_fq_codel_qd_stats {
*/
__u32 new_flows_len; /* count of flows in new list */
__u32 old_flows_len; /* count of flows in old list */
+ __u32 ce_mark; /* packets above ce_threshold */
};
struct tc_fq_codel_cl_stats {
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5710d489..dad2e8e0 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -134,6 +134,8 @@ enum {
RTM_NEWNSID = 88,
#define RTM_NEWNSID RTM_NEWNSID
+ RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
RTM_GETNSID = 90,
#define RTM_GETNSID RTM_GETNSID
@@ -158,7 +160,7 @@ struct rtattr {
/* Macros to handle rtattributes */
-#define RTA_ALIGNTO 4
+#define RTA_ALIGNTO 4U
#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
(rta)->rta_len >= sizeof(struct rtattr) && \
@@ -268,6 +270,7 @@ enum rt_scope_t {
#define RTM_F_CLONED 0x200 /* This route is cloned */
#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
/* Reserved table identifiers */
@@ -304,6 +307,12 @@ enum rtattr_type_t {
RTA_MARK,
RTA_MFC_STATS, /* not used - backported from the future */
RTA_UID,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
+ RTA_EXPIRES,
__RTA_MAX
};
@@ -333,6 +342,10 @@ struct rtnexthop {
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 8 /* offloaded route */
+#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN)
/* Macros to handle hexthops */
@@ -345,6 +358,12 @@ struct rtnexthop {
#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[0];
+};
+
/* RTM_CACHEINFO */
struct rta_cacheinfo {
@@ -402,10 +421,13 @@ enum {
#define RTAX_MAX (__RTAX_MAX - 1)
-#define RTAX_FEATURE_ECN 0x00000001
-#define RTAX_FEATURE_SACK 0x00000002
-#define RTAX_FEATURE_TIMESTAMP 0x00000004
-#define RTAX_FEATURE_ALLFRAG 0x00000008
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1)
+#define RTAX_FEATURE_TIMESTAMP (1 << 2)
+#define RTAX_FEATURE_ALLFRAG (1 << 3)
+
+#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \
+ RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG)
struct rta_session {
__u8 proto;
@@ -622,6 +644,10 @@ enum rtnetlink_groups {
#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
RTNLGRP_MDB,
#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
@@ -641,6 +667,7 @@ struct tcamsg {
#define RTEXT_FILTER_VF (1 << 0)
#define RTEXT_FILTER_BRVLAN (1 << 1)
#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2)
+#define RTEXT_FILTER_SKIP_STATS (1 << 3)
/* End of information exported to user level */
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 78996e23..dafcb891 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY 21
struct sock_diag_req {
__u8 sdiag_family;
@@ -23,4 +24,14 @@ enum {
SK_MEMINFO_VARS,
};
+enum sknetlink_groups {
+ SKNLGRP_NONE,
+ SKNLGRP_INET_TCP_DESTROY,
+ SKNLGRP_INET_UDP_DESTROY,
+ SKNLGRP_INET6_TCP_DESTROY,
+ SKNLGRP_INET6_UDP_DESTROY,
+ __SKNLGRP_MAX,
+};
+#define SKNLGRP_MAX (__SKNLGRP_MAX - 1)
+
#endif /* __SOCK_DIAG_H__ */
diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h
index 5288bd77..07f17cc7 100644
--- a/include/linux/tc_act/tc_bpf.h
+++ b/include/linux/tc_act/tc_bpf.h
@@ -24,6 +24,8 @@ enum {
TCA_ACT_BPF_PARMS,
TCA_ACT_BPF_OPS_LEN,
TCA_ACT_BPF_OPS,
+ TCA_ACT_BPF_FD,
+ TCA_ACT_BPF_NAME,
__TCA_ACT_BPF_MAX,
};
#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
diff --git a/include/linux/tc_act/tc_connmark.h b/include/linux/tc_act/tc_connmark.h
new file mode 100644
index 00000000..994b0971
--- /dev/null
+++ b/include/linux/tc_act/tc_connmark.h
@@ -0,0 +1,22 @@
+#ifndef __UAPI_TC_CONNMARK_H
+#define __UAPI_TC_CONNMARK_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CONNMARK 14
+
+struct tc_connmark {
+ tc_gen;
+ __u16 zone;
+};
+
+enum {
+ TCA_CONNMARK_UNSPEC,
+ TCA_CONNMARK_PARMS,
+ TCA_CONNMARK_TM,
+ __TCA_CONNMARK_MAX
+};
+#define TCA_CONNMARK_MAX (__TCA_CONNMARK_MAX - 1)
+
+#endif
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f96e0158..1e9b4a62 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -112,6 +112,9 @@ enum {
#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
#define TCP_TIMESTAMP 24
#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */
+#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */
+#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */
+#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */
struct tcp_repair_opt {
__u32 opt_code;
@@ -189,6 +192,10 @@ struct tcp_info {
__u64 tcpi_pacing_rate;
__u64 tcpi_max_pacing_rate;
+ __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+ __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
+ __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */
+ __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */
};
/* for TCP_MD5SIG socket option */
diff --git a/include/linux/tipc.h b/include/linux/tipc.h
new file mode 100644
index 00000000..ebd3b63c
--- /dev/null
+++ b/include/linux/tipc.h
@@ -0,0 +1,232 @@
+/*
+ * include/uapi/linux/tipc.h: Header for TIPC socket interface
+ *
+ * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_TIPC_H_
+#define _LINUX_TIPC_H_
+
+#include <linux/types.h>
+#include <linux/sockios.h>
+
+/*
+ * TIPC addressing primitives
+ */
+
+struct tipc_portid {
+ __u32 ref;
+ __u32 node;
+};
+
+struct tipc_name {
+ __u32 type;
+ __u32 instance;
+};
+
+struct tipc_name_seq {
+ __u32 type;
+ __u32 lower;
+ __u32 upper;
+};
+
+static __inline__ __u32 tipc_addr(unsigned int zone,
+ unsigned int cluster,
+ unsigned int node)
+{
+ return (zone << 24) | (cluster << 12) | node;
+}
+
+static __inline__ unsigned int tipc_zone(__u32 addr)
+{
+ return addr >> 24;
+}
+
+static __inline__ unsigned int tipc_cluster(__u32 addr)
+{
+ return (addr >> 12) & 0xfff;
+}
+
+static __inline__ unsigned int tipc_node(__u32 addr)
+{
+ return addr & 0xfff;
+}
+
+/*
+ * Application-accessible port name types
+ */
+
+#define TIPC_CFG_SRV 0 /* configuration service name type */
+#define TIPC_TOP_SRV 1 /* topology service name type */
+#define TIPC_LINK_STATE 2 /* link state name type */
+#define TIPC_RESERVED_TYPES 64 /* lowest user-publishable name type */
+
+/*
+ * Publication scopes when binding port names and port name sequences
+ */
+
+#define TIPC_ZONE_SCOPE 1
+#define TIPC_CLUSTER_SCOPE 2
+#define TIPC_NODE_SCOPE 3
+
+/*
+ * Limiting values for messages
+ */
+
+#define TIPC_MAX_USER_MSG_SIZE 66000U
+
+/*
+ * Message importance levels
+ */
+
+#define TIPC_LOW_IMPORTANCE 0
+#define TIPC_MEDIUM_IMPORTANCE 1
+#define TIPC_HIGH_IMPORTANCE 2
+#define TIPC_CRITICAL_IMPORTANCE 3
+
+/*
+ * Msg rejection/connection shutdown reasons
+ */
+
+#define TIPC_OK 0
+#define TIPC_ERR_NO_NAME 1
+#define TIPC_ERR_NO_PORT 2
+#define TIPC_ERR_NO_NODE 3
+#define TIPC_ERR_OVERLOAD 4
+#define TIPC_CONN_SHUTDOWN 5
+
+/*
+ * TIPC topology subscription service definitions
+ */
+
+#define TIPC_SUB_PORTS 0x01 /* filter for port availability */
+#define TIPC_SUB_SERVICE 0x02 /* filter for service availability */
+#define TIPC_SUB_CANCEL 0x04 /* cancel a subscription */
+
+#define TIPC_WAIT_FOREVER (~0) /* timeout for permanent subscription */
+
+struct tipc_subscr {
+ struct tipc_name_seq seq; /* name sequence of interest */
+ __u32 timeout; /* subscription duration (in ms) */
+ __u32 filter; /* bitmask of filter options */
+ char usr_handle[8]; /* available for subscriber use */
+};
+
+#define TIPC_PUBLISHED 1 /* publication event */
+#define TIPC_WITHDRAWN 2 /* withdraw event */
+#define TIPC_SUBSCR_TIMEOUT 3 /* subscription timeout event */
+
+struct tipc_event {
+ __u32 event; /* event type */
+ __u32 found_lower; /* matching name seq instances */
+ __u32 found_upper; /* " " " " */
+ struct tipc_portid port; /* associated port */
+ struct tipc_subscr s; /* associated subscription */
+};
+
+/*
+ * Socket API
+ */
+
+#ifndef AF_TIPC
+#define AF_TIPC 30
+#endif
+
+#ifndef PF_TIPC
+#define PF_TIPC AF_TIPC
+#endif
+
+#ifndef SOL_TIPC
+#define SOL_TIPC 271
+#endif
+
+#define TIPC_ADDR_NAMESEQ 1
+#define TIPC_ADDR_MCAST 1
+#define TIPC_ADDR_NAME 2
+#define TIPC_ADDR_ID 3
+
+struct sockaddr_tipc {
+ unsigned short family;
+ unsigned char addrtype;
+ signed char scope;
+ union {
+ struct tipc_portid id;
+ struct tipc_name_seq nameseq;
+ struct {
+ struct tipc_name name;
+ __u32 domain;
+ } name;
+ } addr;
+};
+
+/*
+ * Ancillary data objects supported by recvmsg()
+ */
+
+#define TIPC_ERRINFO 1 /* error info */
+#define TIPC_RETDATA 2 /* returned data */
+#define TIPC_DESTNAME 3 /* destination name */
+
+/*
+ * TIPC-specific socket option values
+ */
+
+#define TIPC_IMPORTANCE 127 /* Default: TIPC_LOW_IMPORTANCE */
+#define TIPC_SRC_DROPPABLE 128 /* Default: based on socket type */
+#define TIPC_DEST_DROPPABLE 129 /* Default: based on socket type */
+#define TIPC_CONN_TIMEOUT 130 /* Default: 8000 (ms) */
+#define TIPC_NODE_RECVQ_DEPTH 131 /* Default: none (read only) */
+#define TIPC_SOCK_RECVQ_DEPTH 132 /* Default: none (read only) */
+
+/*
+ * Maximum sizes of TIPC bearer-related names (including terminating NULL)
+ * The string formatting for each name element is:
+ * media: media
+ * interface: media:interface name
+ * link: Z.C.N:interface-Z.C.N:interface
+ *
+ */
+
+#define TIPC_MAX_MEDIA_NAME 16
+#define TIPC_MAX_IF_NAME 16
+#define TIPC_MAX_BEARER_NAME 32
+#define TIPC_MAX_LINK_NAME 60
+
+#define SIOCGETLINKNAME SIOCPROTOPRIVATE
+
+struct tipc_sioc_ln_req {
+ __u32 peer;
+ __u32 bearer_id;
+ char linkname[TIPC_MAX_LINK_NAME];
+};
+#endif
diff --git a/include/linux/tipc_netlink.h b/include/linux/tipc_netlink.h
new file mode 100644
index 00000000..25eb645e
--- /dev/null
+++ b/include/linux/tipc_netlink.h
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2014, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_TIPC_NETLINK_H_
+#define _LINUX_TIPC_NETLINK_H_
+
+#define TIPC_GENL_V2_NAME "TIPCv2"
+#define TIPC_GENL_V2_VERSION 0x1
+
+/* Netlink commands */
+enum {
+ TIPC_NL_UNSPEC,
+ TIPC_NL_LEGACY,
+ TIPC_NL_BEARER_DISABLE,
+ TIPC_NL_BEARER_ENABLE,
+ TIPC_NL_BEARER_GET,
+ TIPC_NL_BEARER_SET,
+ TIPC_NL_SOCK_GET,
+ TIPC_NL_PUBL_GET,
+ TIPC_NL_LINK_GET,
+ TIPC_NL_LINK_SET,
+ TIPC_NL_LINK_RESET_STATS,
+ TIPC_NL_MEDIA_GET,
+ TIPC_NL_MEDIA_SET,
+ TIPC_NL_NODE_GET,
+ TIPC_NL_NET_GET,
+ TIPC_NL_NET_SET,
+ TIPC_NL_NAME_TABLE_GET,
+ TIPC_NL_PEER_REMOVE,
+
+ __TIPC_NL_CMD_MAX,
+ TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
+};
+
+/* Top level netlink attributes */
+enum {
+ TIPC_NLA_UNSPEC,
+ TIPC_NLA_BEARER, /* nest */
+ TIPC_NLA_SOCK, /* nest */
+ TIPC_NLA_PUBL, /* nest */
+ TIPC_NLA_LINK, /* nest */
+ TIPC_NLA_MEDIA, /* nest */
+ TIPC_NLA_NODE, /* nest */
+ TIPC_NLA_NET, /* nest */
+ TIPC_NLA_NAME_TABLE, /* nest */
+
+ __TIPC_NLA_MAX,
+ TIPC_NLA_MAX = __TIPC_NLA_MAX - 1
+};
+
+/* Bearer info */
+enum {
+ TIPC_NLA_BEARER_UNSPEC,
+ TIPC_NLA_BEARER_NAME, /* string */
+ TIPC_NLA_BEARER_PROP, /* nest */
+ TIPC_NLA_BEARER_DOMAIN, /* u32 */
+ TIPC_NLA_BEARER_UDP_OPTS, /* nest */
+
+ __TIPC_NLA_BEARER_MAX,
+ TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1
+};
+
+enum {
+ TIPC_NLA_UDP_UNSPEC,
+ TIPC_NLA_UDP_LOCAL, /* sockaddr_storage */
+ TIPC_NLA_UDP_REMOTE, /* sockaddr_storage */
+
+ __TIPC_NLA_UDP_MAX,
+ TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1
+};
+/* Socket info */
+enum {
+ TIPC_NLA_SOCK_UNSPEC,
+ TIPC_NLA_SOCK_ADDR, /* u32 */
+ TIPC_NLA_SOCK_REF, /* u32 */
+ TIPC_NLA_SOCK_CON, /* nest */
+ TIPC_NLA_SOCK_HAS_PUBL, /* flag */
+
+ __TIPC_NLA_SOCK_MAX,
+ TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
+};
+
+/* Link info */
+enum {
+ TIPC_NLA_LINK_UNSPEC,
+ TIPC_NLA_LINK_NAME, /* string */
+ TIPC_NLA_LINK_DEST, /* u32 */
+ TIPC_NLA_LINK_MTU, /* u32 */
+ TIPC_NLA_LINK_BROADCAST, /* flag */
+ TIPC_NLA_LINK_UP, /* flag */
+ TIPC_NLA_LINK_ACTIVE, /* flag */
+ TIPC_NLA_LINK_PROP, /* nest */
+ TIPC_NLA_LINK_STATS, /* nest */
+ TIPC_NLA_LINK_RX, /* u32 */
+ TIPC_NLA_LINK_TX, /* u32 */
+
+ __TIPC_NLA_LINK_MAX,
+ TIPC_NLA_LINK_MAX = __TIPC_NLA_LINK_MAX - 1
+};
+
+/* Media info */
+enum {
+ TIPC_NLA_MEDIA_UNSPEC,
+ TIPC_NLA_MEDIA_NAME, /* string */
+ TIPC_NLA_MEDIA_PROP, /* nest */
+
+ __TIPC_NLA_MEDIA_MAX,
+ TIPC_NLA_MEDIA_MAX = __TIPC_NLA_MEDIA_MAX - 1
+};
+
+/* Node info */
+enum {
+ TIPC_NLA_NODE_UNSPEC,
+ TIPC_NLA_NODE_ADDR, /* u32 */
+ TIPC_NLA_NODE_UP, /* flag */
+
+ __TIPC_NLA_NODE_MAX,
+ TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1
+};
+
+/* Net info */
+enum {
+ TIPC_NLA_NET_UNSPEC,
+ TIPC_NLA_NET_ID, /* u32 */
+ TIPC_NLA_NET_ADDR, /* u32 */
+
+ __TIPC_NLA_NET_MAX,
+ TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1
+};
+
+/* Name table info */
+enum {
+ TIPC_NLA_NAME_TABLE_UNSPEC,
+ TIPC_NLA_NAME_TABLE_PUBL, /* nest */
+
+ __TIPC_NLA_NAME_TABLE_MAX,
+ TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1
+};
+
+/* Publication info */
+enum {
+ TIPC_NLA_PUBL_UNSPEC,
+
+ TIPC_NLA_PUBL_TYPE, /* u32 */
+ TIPC_NLA_PUBL_LOWER, /* u32 */
+ TIPC_NLA_PUBL_UPPER, /* u32 */
+ TIPC_NLA_PUBL_SCOPE, /* u32 */
+ TIPC_NLA_PUBL_NODE, /* u32 */
+ TIPC_NLA_PUBL_REF, /* u32 */
+ TIPC_NLA_PUBL_KEY, /* u32 */
+
+ __TIPC_NLA_PUBL_MAX,
+ TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1
+};
+
+/* Nest, connection info */
+enum {
+ TIPC_NLA_CON_UNSPEC,
+
+ TIPC_NLA_CON_FLAG, /* flag */
+ TIPC_NLA_CON_NODE, /* u32 */
+ TIPC_NLA_CON_SOCK, /* u32 */
+ TIPC_NLA_CON_TYPE, /* u32 */
+ TIPC_NLA_CON_INST, /* u32 */
+
+ __TIPC_NLA_CON_MAX,
+ TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1
+};
+
+/* Nest, link propreties. Valid for link, media and bearer */
+enum {
+ TIPC_NLA_PROP_UNSPEC,
+
+ TIPC_NLA_PROP_PRIO, /* u32 */
+ TIPC_NLA_PROP_TOL, /* u32 */
+ TIPC_NLA_PROP_WIN, /* u32 */
+
+ __TIPC_NLA_PROP_MAX,
+ TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
+};
+
+/* Nest, statistics info */
+enum {
+ TIPC_NLA_STATS_UNSPEC,
+
+ TIPC_NLA_STATS_RX_INFO, /* u32 */
+ TIPC_NLA_STATS_RX_FRAGMENTS, /* u32 */
+ TIPC_NLA_STATS_RX_FRAGMENTED, /* u32 */
+ TIPC_NLA_STATS_RX_BUNDLES, /* u32 */
+ TIPC_NLA_STATS_RX_BUNDLED, /* u32 */
+ TIPC_NLA_STATS_TX_INFO, /* u32 */
+ TIPC_NLA_STATS_TX_FRAGMENTS, /* u32 */
+ TIPC_NLA_STATS_TX_FRAGMENTED, /* u32 */
+ TIPC_NLA_STATS_TX_BUNDLES, /* u32 */
+ TIPC_NLA_STATS_TX_BUNDLED, /* u32 */
+ TIPC_NLA_STATS_MSG_PROF_TOT, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_CNT, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_TOT, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_P0, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_P1, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_P2, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_P3, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_P4, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_P5, /* u32 */
+ TIPC_NLA_STATS_MSG_LEN_P6, /* u32 */
+ TIPC_NLA_STATS_RX_STATES, /* u32 */
+ TIPC_NLA_STATS_RX_PROBES, /* u32 */
+ TIPC_NLA_STATS_RX_NACKS, /* u32 */
+ TIPC_NLA_STATS_RX_DEFERRED, /* u32 */
+ TIPC_NLA_STATS_TX_STATES, /* u32 */
+ TIPC_NLA_STATS_TX_PROBES, /* u32 */
+ TIPC_NLA_STATS_TX_NACKS, /* u32 */
+ TIPC_NLA_STATS_TX_ACKS, /* u32 */
+ TIPC_NLA_STATS_RETRANSMITTED, /* u32 */
+ TIPC_NLA_STATS_DUPLICATES, /* u32 */
+ TIPC_NLA_STATS_LINK_CONGS, /* u32 */
+ TIPC_NLA_STATS_MAX_QUEUE, /* u32 */
+ TIPC_NLA_STATS_AVG_QUEUE, /* u32 */
+
+ __TIPC_NLA_STATS_MAX,
+ TIPC_NLA_STATS_MAX = __TIPC_NLA_STATS_MAX - 1
+};
+
+#endif
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 3a1fd329..b8f54510 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_XFRM_H
#define _LINUX_XFRM_H
+#include <linux/in6.h>
#include <linux/types.h>
/* All of the structures in this file may not change size as they are
@@ -13,6 +14,7 @@
typedef union {
__be32 a4;
__be32 a6[4];
+ struct in6_addr in6;
} xfrm_address_t;
/* Ident of a specific xfrm_state. It is used on input to lookup
diff --git a/include/ll_map.h b/include/ll_map.h
index 4c78498e..949bfc3e 100644
--- a/include/ll_map.h
+++ b/include/ll_map.h
@@ -1,14 +1,15 @@
#ifndef __LL_MAP_H__
#define __LL_MAP_H__ 1
-extern int ll_remember_index(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
+int ll_remember_index(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
-extern void ll_init_map(struct rtnl_handle *rth);
-extern unsigned ll_name_to_index(const char *name);
-extern const char *ll_index_to_name(unsigned idx);
-extern const char *ll_idx_n2a(unsigned idx, char *buf);
-extern int ll_index_to_type(unsigned idx);
-extern int ll_index_to_flags(unsigned idx);
+void ll_init_map(struct rtnl_handle *rth);
+unsigned ll_name_to_index(const char *name);
+const char *ll_index_to_name(unsigned idx);
+const char *ll_idx_n2a(unsigned idx, char *buf);
+int ll_index_to_type(unsigned idx);
+int ll_index_to_flags(unsigned idx);
+unsigned namehash(const char *str);
#endif /* __LL_MAP_H__ */
diff --git a/include/namespace.h b/include/namespace.h
index a2ac7dcc..51324b21 100644
--- a/include/namespace.h
+++ b/include/namespace.h
@@ -3,6 +3,7 @@
#include <sched.h>
#include <sys/mount.h>
+#include <unistd.h>
#include <sys/syscall.h>
#include <errno.h>
@@ -42,9 +43,9 @@ static inline int setns(int fd, int nstype)
}
#endif /* HAVE_SETNS */
-extern int netns_switch(char *netns);
-extern int netns_get_fd(const char *netns);
-extern int netns_foreach(int (*func)(char *nsname, void *arg), void *arg);
+int netns_switch(char *netns);
+int netns_get_fd(const char *netns);
+int netns_foreach(int (*func)(char *nsname, void *arg), void *arg);
struct netns_func {
int (*func)(char *nsname, void *arg);
diff --git a/include/rt_names.h b/include/rt_names.h
index c0ea4f98..921be060 100644
--- a/include/rt_names.h
+++ b/include/rt_names.h
@@ -22,7 +22,7 @@ int inet_proto_a2n(const char *buf);
const char * ll_type_n2a(int type, char *buf, int len);
-const char *ll_addr_n2a(unsigned char *addr, int alen,
+const char *ll_addr_n2a(const unsigned char *addr, int alen,
int type, char *buf, int blen);
int ll_addr_a2n(char *lladdr, int len, const char *arg);
diff --git a/include/rtm_map.h b/include/rtm_map.h
index 70bda7d0..d6e5885c 100644
--- a/include/rtm_map.h
+++ b/include/rtm_map.h
@@ -4,7 +4,6 @@
char *rtnl_rtntype_n2a(int id, char *buf, int len);
int rtnl_rtntype_a2n(int *id, char *arg);
-int get_rt_realms(__u32 *realms, char *arg);
-
+int get_rt_realms_or_raw(__u32 *realms, char *arg);
#endif /* __RTM_MAP_H__ */
diff --git a/include/utils.h b/include/utils.h
index 9151c4f1..7310f4e0 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -19,9 +19,10 @@ extern int show_details;
extern int show_raw;
extern int resolve_hosts;
extern int oneline;
+extern int brief;
extern int timestamp;
extern int timestamp_short;
-extern char * _SL_;
+extern const char * _SL_;
extern int max_flush_loops;
extern int batch_mode;
extern bool do_all;
@@ -39,21 +40,27 @@ extern bool do_all;
#define IPSEC_PROTO_ANY 255
#endif
+#ifndef CONFDIR
+#define CONFDIR "/etc/iproute2"
+#endif
+
#define SPRINT_BSIZE 64
#define SPRINT_BUF(x) char x[SPRINT_BSIZE]
-extern void incomplete_command(void) __attribute__((noreturn));
+void incomplete_command(void) __attribute__((noreturn));
#define NEXT_ARG() do { argv++; if (--argc <= 0) incomplete_command(); } while(0)
#define NEXT_ARG_OK() (argc - 1 > 0)
+#define NEXT_ARG_FWD() do { argv++; argc--; } while(0)
#define PREV_ARG() do { argv--; argc++; } while(0)
typedef struct
{
- __u8 family;
- __u8 bytelen;
+ __u16 flags;
+ __u16 bytelen;
__s16 bitlen;
- __u32 flags;
+ /* These next two fields match rtvia */
+ __u16 family;
__u32 data[8];
} inet_prefix;
@@ -77,44 +84,58 @@ struct ipx_addr {
u_int8_t ipx_node[IPX_NODE_LEN];
};
-extern __u32 get_addr32(const char *name);
-extern int get_addr_1(inet_prefix *dst, const char *arg, int family);
-extern int get_prefix_1(inet_prefix *dst, char *arg, int family);
-extern int get_addr(inet_prefix *dst, const char *arg, int family);
-extern int get_prefix(inet_prefix *dst, char *arg, int family);
-extern int mask2bits(__u32 netmask);
+#ifndef AF_MPLS
+# define AF_MPLS 28
+#endif
+
+/* Maximum number of labels the mpls helpers support */
+#define MPLS_MAX_LABELS 8
+
+__u32 get_addr32(const char *name);
+int get_addr_1(inet_prefix *dst, const char *arg, int family);
+int get_prefix_1(inet_prefix *dst, char *arg, int family);
+int get_addr(inet_prefix *dst, const char *arg, int family);
+int get_prefix(inet_prefix *dst, char *arg, int family);
+int mask2bits(__u32 netmask);
+int get_addr_ila(__u64 *val, const char *arg);
-extern int get_integer(int *val, const char *arg, int base);
-extern int get_unsigned(unsigned *val, const char *arg, int base);
-extern int get_time_rtt(unsigned *val, const char *arg, int *raw);
+int get_integer(int *val, const char *arg, int base);
+int get_unsigned(unsigned *val, const char *arg, int base);
+int get_time_rtt(unsigned *val, const char *arg, int *raw);
#define get_byte get_u8
#define get_ushort get_u16
#define get_short get_s16
-extern int get_u64(__u64 *val, const char *arg, int base);
-extern int get_u32(__u32 *val, const char *arg, int base);
-extern int get_s32(__s32 *val, const char *arg, int base);
-extern int get_u16(__u16 *val, const char *arg, int base);
-extern int get_s16(__s16 *val, const char *arg, int base);
-extern int get_u8(__u8 *val, const char *arg, int base);
-extern int get_s8(__s8 *val, const char *arg, int base);
-
-extern char* hexstring_n2a(const __u8 *str, int len, char *buf, int blen);
-extern __u8* hexstring_a2n(const char *str, __u8 *buf, int blen);
-
-extern int af_bit_len(int af);
-extern int af_byte_len(int af);
-
-extern const char *format_host(int af, int len, const void *addr,
+int get_u64(__u64 *val, const char *arg, int base);
+int get_u32(__u32 *val, const char *arg, int base);
+int get_s32(__s32 *val, const char *arg, int base);
+int get_u16(__u16 *val, const char *arg, int base);
+int get_s16(__s16 *val, const char *arg, int base);
+int get_u8(__u8 *val, const char *arg, int base);
+int get_s8(__s8 *val, const char *arg, int base);
+int get_addr64(__u64 *ap, const char *cp);
+
+char* hexstring_n2a(const __u8 *str, int len, char *buf, int blen);
+__u8* hexstring_a2n(const char *str, __u8 *buf, int blen);
+#define ADDR64_BUF_SIZE sizeof("xxxx:xxxx:xxxx:xxxx")
+int addr64_n2a(__u64 addr, char *buff, size_t len);
+
+int af_bit_len(int af);
+int af_byte_len(int af);
+
+const char *format_host(int af, int len, const void *addr,
char *buf, int buflen);
-extern const char *rt_addr_n2a(int af, const void *addr,
+const char *rt_addr_n2a(int af, int len, const void *addr,
char *buf, int buflen);
+int read_family(const char *name);
+const char *family_name(int family);
+
void missarg(const char *) __attribute__((noreturn));
void invarg(const char *, const char *) __attribute__((noreturn));
void duparg(const char *, const char *) __attribute__((noreturn));
void duparg2(const char *, const char *) __attribute__((noreturn));
int matches(const char *arg, const char *pattern);
-extern int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits);
+int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits);
const char *dnet_ntop(int af, const void *addr, char *str, size_t len);
int dnet_pton(int af, const char *src, void *addr);
@@ -122,8 +143,11 @@ int dnet_pton(int af, const char *src, void *addr);
const char *ipx_ntop(int af, const void *addr, char *str, size_t len);
int ipx_pton(int af, const char *src, void *addr);
+const char *mpls_ntop(int af, const void *addr, char *str, size_t len);
+int mpls_pton(int af, const char *src, void *addr);
+
extern int __iproute2_hz_internal;
-extern int __get_hz(void);
+int __get_hz(void);
static __inline__ int get_hz(void)
{
@@ -133,7 +157,7 @@ static __inline__ int get_hz(void)
}
extern int __iproute2_user_hz_internal;
-extern int __get_user_hz(void);
+int __get_user_hz(void);
static __inline__ int get_user_hz(void)
{
@@ -157,17 +181,42 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
+
+#ifndef offsetof
+# define offsetof(type, member) ((size_t) &((type *)0)->member)
+#endif
+
+#ifndef min
+# define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#ifndef __check_format_string
+# define __check_format_string(pos_str, pos_args) \
+ __attribute__ ((format (printf, (pos_str), (pos_args))))
+#endif
+
+#define _textify(x) #x
+#define textify(x) _textify(x)
+
+#define htonll(x) ((1==htonl(1)) ? (x) : ((uint64_t)htonl((x) & 0xFFFFFFFF) << 32) | htonl((x) >> 32))
+#define ntohll(x) ((1==ntohl(1)) ? (x) : ((uint64_t)ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((x) >> 32))
+
extern int cmdlineno;
-extern ssize_t getcmdline(char **line, size_t *len, FILE *in);
-extern int makeargs(char *line, char *argv[], int maxargs);
-extern int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6);
+ssize_t getcmdline(char **line, size_t *len, FILE *in);
+int makeargs(char *line, char *argv[], int maxargs);
+int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6);
struct iplink_req;
int iplink_parse(int argc, char **argv, struct iplink_req *req,
char **name, char **type, char **link, char **dev,
int *group, int *index);
-extern int do_each_netns(int (*func)(char *nsname, void *arg), void *arg,
+int do_each_netns(int (*func)(char *nsname, void *arg), void *arg,
bool show_label);
char *int_to_str(int val, char *buf);
diff --git a/include/xtables.h b/include/xtables.h
new file mode 100644
index 00000000..978ae0d1
--- /dev/null
+++ b/include/xtables.h
@@ -0,0 +1,567 @@
+#ifndef _XTABLES_H
+#define _XTABLES_H
+
+/*
+ * Changing any structs/functions may incur a needed change
+ * in libxtables_vcurrent/vage too.
+ */
+
+#include <sys/socket.h> /* PF_* */
+#include <sys/types.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+#ifndef IPPROTO_DCCP
+#define IPPROTO_DCCP 33
+#endif
+#ifndef IPPROTO_MH
+# define IPPROTO_MH 135
+#endif
+#ifndef IPPROTO_UDPLITE
+#define IPPROTO_UDPLITE 136
+#endif
+
+#include <xtables-version.h>
+
+struct in_addr;
+
+/*
+ * .size is here so that there is a somewhat reasonable check
+ * against the chosen .type.
+ */
+#define XTOPT_POINTER(stype, member) \
+ .ptroff = offsetof(stype, member), \
+ .size = sizeof(((stype *)NULL)->member)
+#define XTOPT_TABLEEND {.name = NULL}
+
+/**
+ * Select the format the input has to conform to, as well as the target type
+ * (area pointed to with XTOPT_POINTER). Note that the storing is not always
+ * uniform. @cb->val will be populated with as much as there is space, i.e.
+ * exactly 2 items for ranges, but the target area can receive more values
+ * (e.g. in case of ranges), or less values (e.g. %XTTYPE_HOSTMASK).
+ *
+ * %XTTYPE_NONE: option takes no argument
+ * %XTTYPE_UINT*: standard integer
+ * %XTTYPE_UINT*RC: colon-separated range of standard integers
+ * %XTTYPE_DOUBLE: double-precision floating point number
+ * %XTTYPE_STRING: arbitrary string
+ * %XTTYPE_TOSMASK: 8-bit TOS value with optional mask
+ * %XTTYPE_MARKMASK32: 32-bit mark with optional mask
+ * %XTTYPE_SYSLOGLEVEL: syslog level by name or number
+ * %XTTYPE_HOST: one host or address (ptr: union nf_inet_addr)
+ * %XTTYPE_HOSTMASK: one host or address, with an optional prefix length
+ * (ptr: union nf_inet_addr; only host portion is stored)
+ * %XTTYPE_PROTOCOL: protocol number/name from /etc/protocols (ptr: uint8_t)
+ * %XTTYPE_PORT: 16-bit port name or number (supports %XTOPT_NBO)
+ * %XTTYPE_PORTRC: colon-separated port range (names acceptable),
+ * (supports %XTOPT_NBO)
+ * %XTTYPE_PLEN: prefix length
+ * %XTTYPE_PLENMASK: prefix length (ptr: union nf_inet_addr)
+ * %XTTYPE_ETHERMAC: Ethernet MAC address in hex form
+ */
+enum xt_option_type {
+ XTTYPE_NONE,
+ XTTYPE_UINT8,
+ XTTYPE_UINT16,
+ XTTYPE_UINT32,
+ XTTYPE_UINT64,
+ XTTYPE_UINT8RC,
+ XTTYPE_UINT16RC,
+ XTTYPE_UINT32RC,
+ XTTYPE_UINT64RC,
+ XTTYPE_DOUBLE,
+ XTTYPE_STRING,
+ XTTYPE_TOSMASK,
+ XTTYPE_MARKMASK32,
+ XTTYPE_SYSLOGLEVEL,
+ XTTYPE_HOST,
+ XTTYPE_HOSTMASK,
+ XTTYPE_PROTOCOL,
+ XTTYPE_PORT,
+ XTTYPE_PORTRC,
+ XTTYPE_PLEN,
+ XTTYPE_PLENMASK,
+ XTTYPE_ETHERMAC,
+};
+
+/**
+ * %XTOPT_INVERT: option is invertible (usable with !)
+ * %XTOPT_MAND: option is mandatory
+ * %XTOPT_MULTI: option may be specified multiple times
+ * %XTOPT_PUT: store value into memory at @ptroff
+ * %XTOPT_NBO: store value in network-byte order
+ * (only certain XTTYPEs recognize this)
+ */
+enum xt_option_flags {
+ XTOPT_INVERT = 1 << 0,
+ XTOPT_MAND = 1 << 1,
+ XTOPT_MULTI = 1 << 2,
+ XTOPT_PUT = 1 << 3,
+ XTOPT_NBO = 1 << 4,
+};
+
+/**
+ * @name: name of option
+ * @type: type of input and validation method, see %XTTYPE_*
+ * @id: unique number (within extension) for option, 0-31
+ * @excl: bitmask of flags that cannot be used with this option
+ * @also: bitmask of flags that must be used with this option
+ * @flags: bitmask of option flags, see %XTOPT_*
+ * @ptroff: offset into private structure for member
+ * @size: size of the item pointed to by @ptroff; this is a safeguard
+ * @min: lowest allowed value (for singular integral types)
+ * @max: highest allowed value (for singular integral types)
+ */
+struct xt_option_entry {
+ const char *name;
+ enum xt_option_type type;
+ unsigned int id, excl, also, flags;
+ unsigned int ptroff;
+ size_t size;
+ unsigned int min, max;
+};
+
+/**
+ * @arg: input from command line
+ * @ext_name: name of extension currently being processed
+ * @entry: current option being processed
+ * @data: per-extension kernel data block
+ * @xflags: options of the extension that have been used
+ * @invert: whether option was used with !
+ * @nvals: number of results in uXX_multi
+ * @val: parsed result
+ * @udata: per-extension private scratch area
+ * (cf. xtables_{match,target}->udata_size)
+ */
+struct xt_option_call {
+ const char *arg, *ext_name;
+ const struct xt_option_entry *entry;
+ void *data;
+ unsigned int xflags;
+ bool invert;
+ uint8_t nvals;
+ union {
+ uint8_t u8, u8_range[2], syslog_level, protocol;
+ uint16_t u16, u16_range[2], port, port_range[2];
+ uint32_t u32, u32_range[2];
+ uint64_t u64, u64_range[2];
+ double dbl;
+ struct {
+ union nf_inet_addr haddr, hmask;
+ uint8_t hlen;
+ };
+ struct {
+ uint8_t tos_value, tos_mask;
+ };
+ struct {
+ uint32_t mark, mask;
+ };
+ uint8_t ethermac[6];
+ } val;
+ /* Wished for a world where the ones below were gone: */
+ union {
+ struct xt_entry_match **match;
+ struct xt_entry_target **target;
+ };
+ void *xt_entry;
+ void *udata;
+};
+
+/**
+ * @ext_name: name of extension currently being processed
+ * @data: per-extension (kernel) data block
+ * @udata: per-extension private scratch area
+ * (cf. xtables_{match,target}->udata_size)
+ * @xflags: options of the extension that have been used
+ */
+struct xt_fcheck_call {
+ const char *ext_name;
+ void *data, *udata;
+ unsigned int xflags;
+};
+
+/**
+ * A "linear"/linked-list based name<->id map, for files similar to
+ * /etc/iproute2/.
+ */
+struct xtables_lmap {
+ char *name;
+ int id;
+ struct xtables_lmap *next;
+};
+
+enum xtables_ext_flags {
+ XTABLES_EXT_ALIAS = 1 << 0,
+};
+
+/* Include file for additions: new matches and targets. */
+struct xtables_match
+{
+ /*
+ * ABI/API version this module requires. Must be first member,
+ * as the rest of this struct may be subject to ABI changes.
+ */
+ const char *version;
+
+ struct xtables_match *next;
+
+ const char *name;
+ const char *real_name;
+
+ /* Revision of match (0 by default). */
+ uint8_t revision;
+
+ /* Extension flags */
+ uint8_t ext_flags;
+
+ uint16_t family;
+
+ /* Size of match data. */
+ size_t size;
+
+ /* Size of match data relevant for userspace comparison purposes */
+ size_t userspacesize;
+
+ /* Function which prints out usage message. */
+ void (*help)(void);
+
+ /* Initialize the match. */
+ void (*init)(struct xt_entry_match *m);
+
+ /* Function which parses command options; returns true if it
+ ate an option */
+ /* entry is struct ipt_entry for example */
+ int (*parse)(int c, char **argv, int invert, unsigned int *flags,
+ const void *entry,
+ struct xt_entry_match **match);
+
+ /* Final check; exit if not ok. */
+ void (*final_check)(unsigned int flags);
+
+ /* Prints out the match iff non-NULL: put space at end */
+ /* ip is struct ipt_ip * for example */
+ void (*print)(const void *ip,
+ const struct xt_entry_match *match, int numeric);
+
+ /* Saves the match info in parsable form to stdout. */
+ /* ip is struct ipt_ip * for example */
+ void (*save)(const void *ip, const struct xt_entry_match *match);
+
+ /* Print match name or alias */
+ const char *(*alias)(const struct xt_entry_match *match);
+
+ /* Pointer to list of extra command-line options */
+ const struct option *extra_opts;
+
+ /* New parser */
+ void (*x6_parse)(struct xt_option_call *);
+ void (*x6_fcheck)(struct xt_fcheck_call *);
+ const struct xt_option_entry *x6_options;
+
+ /* Size of per-extension instance extra "global" scratch space */
+ size_t udata_size;
+
+ /* Ignore these men behind the curtain: */
+ void *udata;
+ unsigned int option_offset;
+ struct xt_entry_match *m;
+ unsigned int mflags;
+ unsigned int loaded; /* simulate loading so options are merged properly */
+};
+
+struct xtables_target
+{
+ /*
+ * ABI/API version this module requires. Must be first member,
+ * as the rest of this struct may be subject to ABI changes.
+ */
+ const char *version;
+
+ struct xtables_target *next;
+
+
+ const char *name;
+
+ /* Real target behind this, if any. */
+ const char *real_name;
+
+ /* Revision of target (0 by default). */
+ uint8_t revision;
+
+ /* Extension flags */
+ uint8_t ext_flags;
+
+ uint16_t family;
+
+
+ /* Size of target data. */
+ size_t size;
+
+ /* Size of target data relevant for userspace comparison purposes */
+ size_t userspacesize;
+
+ /* Function which prints out usage message. */
+ void (*help)(void);
+
+ /* Initialize the target. */
+ void (*init)(struct xt_entry_target *t);
+
+ /* Function which parses command options; returns true if it
+ ate an option */
+ /* entry is struct ipt_entry for example */
+ int (*parse)(int c, char **argv, int invert, unsigned int *flags,
+ const void *entry,
+ struct xt_entry_target **targetinfo);
+
+ /* Final check; exit if not ok. */
+ void (*final_check)(unsigned int flags);
+
+ /* Prints out the target iff non-NULL: put space at end */
+ void (*print)(const void *ip,
+ const struct xt_entry_target *target, int numeric);
+
+ /* Saves the targinfo in parsable form to stdout. */
+ void (*save)(const void *ip,
+ const struct xt_entry_target *target);
+
+ /* Print target name or alias */
+ const char *(*alias)(const struct xt_entry_target *target);
+
+ /* Pointer to list of extra command-line options */
+ const struct option *extra_opts;
+
+ /* New parser */
+ void (*x6_parse)(struct xt_option_call *);
+ void (*x6_fcheck)(struct xt_fcheck_call *);
+ const struct xt_option_entry *x6_options;
+
+ size_t udata_size;
+
+ /* Ignore these men behind the curtain: */
+ void *udata;
+ unsigned int option_offset;
+ struct xt_entry_target *t;
+ unsigned int tflags;
+ unsigned int used;
+ unsigned int loaded; /* simulate loading so options are merged properly */
+};
+
+struct xtables_rule_match {
+ struct xtables_rule_match *next;
+ struct xtables_match *match;
+ /* Multiple matches of the same type: the ones before
+ the current one are completed from parsing point of view */
+ bool completed;
+};
+
+/**
+ * struct xtables_pprot -
+ *
+ * A few hardcoded protocols for 'all' and in case the user has no
+ * /etc/protocols.
+ */
+struct xtables_pprot {
+ const char *name;
+ uint8_t num;
+};
+
+enum xtables_tryload {
+ XTF_DONT_LOAD,
+ XTF_DURING_LOAD,
+ XTF_TRY_LOAD,
+ XTF_LOAD_MUST_SUCCEED,
+};
+
+enum xtables_exittype {
+ OTHER_PROBLEM = 1,
+ PARAMETER_PROBLEM,
+ VERSION_PROBLEM,
+ RESOURCE_PROBLEM,
+ XTF_ONLY_ONCE,
+ XTF_NO_INVERT,
+ XTF_BAD_VALUE,
+ XTF_ONE_ACTION,
+};
+
+struct xtables_globals
+{
+ unsigned int option_offset;
+ const char *program_name, *program_version;
+ struct option *orig_opts;
+ struct option *opts;
+ void (*exit_err)(enum xtables_exittype status, const char *msg, ...) __attribute__((noreturn, format(printf,2,3)));
+ int (*compat_rev)(const char *name, uint8_t rev, int opt);
+};
+
+#define XT_GETOPT_TABLEEND {.name = NULL, .has_arg = false}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const char *xtables_modprobe_program;
+extern struct xtables_match *xtables_matches;
+extern struct xtables_target *xtables_targets;
+
+extern void xtables_init(void);
+extern void xtables_set_nfproto(uint8_t);
+extern void *xtables_calloc(size_t, size_t);
+extern void *xtables_malloc(size_t);
+extern void *xtables_realloc(void *, size_t);
+
+extern int xtables_insmod(const char *, const char *, bool);
+extern int xtables_load_ko(const char *, bool);
+extern int xtables_set_params(struct xtables_globals *xtp);
+extern void xtables_free_opts(int reset_offset);
+extern struct option *xtables_merge_options(struct option *origopts,
+ struct option *oldopts, const struct option *newopts,
+ unsigned int *option_offset);
+
+extern int xtables_init_all(struct xtables_globals *xtp, uint8_t nfproto);
+extern struct xtables_match *xtables_find_match(const char *name,
+ enum xtables_tryload, struct xtables_rule_match **match);
+extern struct xtables_target *xtables_find_target(const char *name,
+ enum xtables_tryload);
+extern int xtables_compatible_revision(const char *name, uint8_t revision,
+ int opt);
+
+extern void xtables_rule_matches_free(struct xtables_rule_match **matches);
+
+/* Your shared library should call one of these. */
+extern void xtables_register_match(struct xtables_match *me);
+extern void xtables_register_matches(struct xtables_match *, unsigned int);
+extern void xtables_register_target(struct xtables_target *me);
+extern void xtables_register_targets(struct xtables_target *, unsigned int);
+
+extern bool xtables_strtoul(const char *, char **, uintmax_t *,
+ uintmax_t, uintmax_t);
+extern bool xtables_strtoui(const char *, char **, unsigned int *,
+ unsigned int, unsigned int);
+extern int xtables_service_to_port(const char *name, const char *proto);
+extern uint16_t xtables_parse_port(const char *port, const char *proto);
+extern void
+xtables_parse_interface(const char *arg, char *vianame, unsigned char *mask);
+
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 uint64_t __attribute__((aligned(8)))
+
+extern struct xtables_globals *xt_params;
+#define xtables_error (xt_params->exit_err)
+
+extern void xtables_param_act(unsigned int, const char *, ...);
+
+extern const char *xtables_ipaddr_to_numeric(const struct in_addr *);
+extern const char *xtables_ipaddr_to_anyname(const struct in_addr *);
+extern const char *xtables_ipmask_to_numeric(const struct in_addr *);
+extern struct in_addr *xtables_numeric_to_ipaddr(const char *);
+extern struct in_addr *xtables_numeric_to_ipmask(const char *);
+extern int xtables_ipmask_to_cidr(const struct in_addr *);
+extern void xtables_ipparse_any(const char *, struct in_addr **,
+ struct in_addr *, unsigned int *);
+extern void xtables_ipparse_multiple(const char *, struct in_addr **,
+ struct in_addr **, unsigned int *);
+
+extern struct in6_addr *xtables_numeric_to_ip6addr(const char *);
+extern const char *xtables_ip6addr_to_numeric(const struct in6_addr *);
+extern const char *xtables_ip6addr_to_anyname(const struct in6_addr *);
+extern const char *xtables_ip6mask_to_numeric(const struct in6_addr *);
+extern int xtables_ip6mask_to_cidr(const struct in6_addr *);
+extern void xtables_ip6parse_any(const char *, struct in6_addr **,
+ struct in6_addr *, unsigned int *);
+extern void xtables_ip6parse_multiple(const char *, struct in6_addr **,
+ struct in6_addr **, unsigned int *);
+
+/**
+ * Print the specified value to standard output, quoting dangerous
+ * characters if required.
+ */
+extern void xtables_save_string(const char *value);
+
+#define FMT_NUMERIC 0x0001
+#define FMT_NOCOUNTS 0x0002
+#define FMT_KILOMEGAGIGA 0x0004
+#define FMT_OPTIONS 0x0008
+#define FMT_NOTABLE 0x0010
+#define FMT_NOTARGET 0x0020
+#define FMT_VIA 0x0040
+#define FMT_NONEWLINE 0x0080
+#define FMT_LINENUMBERS 0x0100
+
+#define FMT_PRINT_RULE (FMT_NOCOUNTS | FMT_OPTIONS | FMT_VIA \
+ | FMT_NUMERIC | FMT_NOTABLE)
+#define FMT(tab,notab) ((format) & FMT_NOTABLE ? (notab) : (tab))
+
+extern void xtables_print_num(uint64_t number, unsigned int format);
+
+#if defined(ALL_INCLUSIVE) || defined(NO_SHARED_LIBS)
+# ifdef _INIT
+# undef _init
+# define _init _INIT
+# endif
+ extern void init_extensions(void);
+ extern void init_extensions4(void);
+ extern void init_extensions6(void);
+#else
+# define _init __attribute__((constructor)) _INIT
+#endif
+
+extern const struct xtables_pprot xtables_chain_protos[];
+extern uint16_t xtables_parse_protocol(const char *s);
+
+/* kernel revision handling */
+extern int kernel_version;
+extern void get_kernel_version(void);
+#define LINUX_VERSION(x,y,z) (0x10000*(x) + 0x100*(y) + z)
+#define LINUX_VERSION_MAJOR(x) (((x)>>16) & 0xFF)
+#define LINUX_VERSION_MINOR(x) (((x)>> 8) & 0xFF)
+#define LINUX_VERSION_PATCH(x) ( (x) & 0xFF)
+
+/* xtoptions.c */
+extern void xtables_option_metavalidate(const char *,
+ const struct xt_option_entry *);
+extern struct option *xtables_options_xfrm(struct option *, struct option *,
+ const struct xt_option_entry *,
+ unsigned int *);
+extern void xtables_option_parse(struct xt_option_call *);
+extern void xtables_option_tpcall(unsigned int, char **, bool,
+ struct xtables_target *, void *);
+extern void xtables_option_mpcall(unsigned int, char **, bool,
+ struct xtables_match *, void *);
+extern void xtables_option_tfcall(struct xtables_target *);
+extern void xtables_option_mfcall(struct xtables_match *);
+extern void xtables_options_fcheck(const char *, unsigned int,
+ const struct xt_option_entry *);
+
+extern struct xtables_lmap *xtables_lmap_init(const char *);
+extern void xtables_lmap_free(struct xtables_lmap *);
+extern int xtables_lmap_name2id(const struct xtables_lmap *, const char *);
+extern const char *xtables_lmap_id2name(const struct xtables_lmap *, int);
+
+#ifdef XTABLES_INTERNAL
+
+/* Shipped modules rely on this... */
+
+# ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+# endif
+
+extern void _init(void);
+
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* _XTABLES_H */
diff --git a/ip/Android.mk b/ip/Android.mk
index a6c72da7..5c45bc76 100644
--- a/ip/Android.mk
+++ b/ip/Android.mk
@@ -6,8 +6,9 @@ LOCAL_SRC_FILES := ip.c ipaddress.c ipaddrlabel.c iproute.c iprule.c ipnetns.c \
ipmaddr.c ipmonitor.c ipmroute.c ipprefix.c iptuntap.c \
ipxfrm.c xfrm_state.c xfrm_policy.c xfrm_monitor.c \
iplink_vlan.c link_veth.c link_gre.c iplink_can.c \
- iplink_macvlan.c iplink_macvtap.c ipl2tp.c \
- ipfou.c iptoken.c tcp_metrics.c ipnetconf.c
+ iplink_macvlan.c ipl2tp.c \
+ ipfou.c iptoken.c tcp_metrics.c ipnetconf.c \
+ iproute_lwtunnel.c
LOCAL_MODULE := ip
diff --git a/ip/Makefile b/ip/Makefile
index 2c742f30..f3d29873 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -3,10 +3,11 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
ipmaddr.o ipmonitor.o ipmroute.o ipprefix.o iptuntap.o iptoken.o \
ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o \
iplink_vlan.o link_veth.o link_gre.o iplink_can.o \
- iplink_macvlan.o iplink_macvtap.o ipl2tp.o link_vti.o link_vti6.o \
+ iplink_macvlan.o ipl2tp.o link_vti.o link_vti6.o \
iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
- iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o
+ iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
+ iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o
RTMONOBJ=rtmon.o
diff --git a/ip/ip.c b/ip/ip.c
index 6222a338..51621008 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -23,20 +23,22 @@
#include "utils.h"
#include "ip_common.h"
#include "namespace.h"
+#include "color.h"
int preferred_family = AF_UNSPEC;
-int human_readable = 0;
-int use_iec = 0;
-int show_stats = 0;
-int show_details = 0;
-int resolve_hosts = 0;
-int oneline = 0;
-int timestamp = 0;
-char * _SL_ = NULL;
-int force = 0;
+int human_readable;
+int use_iec;
+int show_stats;
+int show_details;
+int resolve_hosts;
+int oneline;
+int brief;
+int timestamp;
+const char *_SL_;
+int force;
int max_flush_loops = 10;
-int batch_mode = 0;
-bool do_all = false;
+int batch_mode;
+bool do_all;
struct rtnl_handle rth = { .fd = -1 };
@@ -47,23 +49,23 @@ static void usage(void)
fprintf(stderr,
"Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n"
" ip [ -force ] -batch filename\n"
-"where OBJECT := { link | addr | addrlabel | route | rule | neigh | ntable |\n"
-" tunnel | tuntap | maddr | mroute | mrule | monitor | xfrm |\n"
+"where OBJECT := { link | address | addrlabel | route | rule | neighbor | ntable |\n"
+" tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n"
" netns | l2tp | fou | tcp_metrics | token | netconf }\n"
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
" -h[uman-readable] | -iec |\n"
-" -f[amily] { inet | inet6 | ipx | dnet | bridge | link } |\n"
+" -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
" -4 | -6 | -I | -D | -B | -0 |\n"
-" -l[oops] { maximum-addr-flush-attempts } |\n"
+" -l[oops] { maximum-addr-flush-attempts } | -br[ief] |\n"
" -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n"
-" -rc[vbuf] [size] | -n[etns] name | -a[ll] }\n");
+" -rc[vbuf] [size] | -n[etns] name | -a[ll] | -c[olor]}\n");
exit(-1);
}
static int do_help(int argc, char **argv)
{
usage();
- return 0;
+ return 0;
}
static const struct cmd {
@@ -88,7 +90,7 @@ static const struct cmd {
{ "tap", do_iptuntap },
{ "token", do_iptoken },
{ "tcpmetrics", do_tcp_metrics },
- { "tcp_metrics",do_tcp_metrics },
+ { "tcp_metrics", do_tcp_metrics },
{ "monitor", do_ipmonitor },
{ "xfrm", do_xfrm },
{ "mroute", do_multiroute },
@@ -104,9 +106,8 @@ static int do_cmd(const char *argv0, int argc, char **argv)
const struct cmd *c;
for (c = cmds; c->cmd; ++c) {
- if (matches(argv0, c->cmd) == 0) {
+ if (matches(argv0, c->cmd) == 0)
return -(c->func(argc-1, argv+1));
- }
}
fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv0);
@@ -119,12 +120,14 @@ static int batch(const char *name)
char *line = NULL;
size_t len = 0;
int ret = EXIT_SUCCESS;
+ int orig_family = preferred_family;
batch_mode = 1;
if (name && strcmp(name, "-") != 0) {
if (freopen(name, "r", stdin) == NULL) {
- fprintf(stderr, "Cannot open file \"%s\" for reading: %s\n",
+ fprintf(stderr,
+ "Cannot open file \"%s\" for reading: %s\n",
name, strerror(errno));
return EXIT_FAILURE;
}
@@ -140,12 +143,15 @@ static int batch(const char *name)
char *largv[100];
int largc;
+ preferred_family = orig_family;
+
largc = makeargs(line, largv, 100);
if (largc == 0)
continue; /* blank line */
if (do_cmd(largv[0], largc, largv)) {
- fprintf(stderr, "Command failed %s:%d\n", name, cmdlineno);
+ fprintf(stderr, "Command failed %s:%d\n",
+ name, cmdlineno);
ret = EXIT_FAILURE;
if (!force)
break;
@@ -175,7 +181,8 @@ int main(int argc, char **argv)
while (argc > 1) {
char *opt = argv[1];
- if (strcmp(opt,"--") == 0) {
+
+ if (strcmp(opt, "--") == 0) {
argc--; argv++;
break;
}
@@ -188,27 +195,17 @@ int main(int argc, char **argv)
argv++;
if (argc <= 1)
usage();
- max_flush_loops = atoi(argv[1]);
- } else if (matches(opt, "-family") == 0) {
+ max_flush_loops = atoi(argv[1]);
+ } else if (matches(opt, "-family") == 0) {
argc--;
argv++;
if (argc <= 1)
usage();
- if (strcmp(argv[1], "inet") == 0)
- preferred_family = AF_INET;
- else if (strcmp(argv[1], "inet6") == 0)
- preferred_family = AF_INET6;
- else if (strcmp(argv[1], "dnet") == 0)
- preferred_family = AF_DECnet;
- else if (strcmp(argv[1], "link") == 0)
- preferred_family = AF_PACKET;
- else if (strcmp(argv[1], "ipx") == 0)
- preferred_family = AF_IPX;
- else if (strcmp(argv[1], "bridge") == 0)
- preferred_family = AF_BRIDGE;
- else if (strcmp(argv[1], "help") == 0)
+ if (strcmp(argv[1], "help") == 0)
usage();
else
+ preferred_family = read_family(argv[1]);
+ if (preferred_family == AF_UNSPEC)
invarg("invalid protocol family", argv[1]);
} else if (strcmp(opt, "-4") == 0) {
preferred_family = AF_INET;
@@ -220,6 +217,8 @@ int main(int argc, char **argv)
preferred_family = AF_IPX;
} else if (strcmp(opt, "-D") == 0) {
preferred_family = AF_DECnet;
+ } else if (strcmp(opt, "-M") == 0) {
+ preferred_family = AF_MPLS;
} else if (strcmp(opt, "-B") == 0) {
preferred_family = AF_BRIDGE;
} else if (matches(opt, "-human") == 0 ||
@@ -258,6 +257,8 @@ int main(int argc, char **argv)
usage();
batch_file = argv[1];
#endif
+ } else if (matches(opt, "-brief") == 0) {
+ ++brief;
} else if (matches(opt, "-rcvbuf") == 0) {
unsigned int size;
@@ -271,6 +272,8 @@ int main(int argc, char **argv)
exit(-1);
}
rcvbuf = size;
+ } else if (matches(opt, "-color") == 0) {
+ enable_color();
} else if (matches(opt, "-help") == 0) {
usage();
} else if (matches(opt, "-netns") == 0) {
@@ -280,13 +283,15 @@ int main(int argc, char **argv)
} else if (matches(opt, "-all") == 0) {
do_all = true;
} else {
- fprintf(stderr, "Option \"%s\" is unknown, try \"ip -help\".\n", opt);
+ fprintf(stderr,
+ "Option \"%s\" is unknown, try \"ip -help\".\n",
+ opt);
exit(-1);
}
argc--; argv++;
}
- _SL_ = oneline ? "\\" : "\n" ;
+ _SL_ = oneline ? "\\" : "\n";
#ifndef ANDROID
if (batch_file)
diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c
index 62a8240f..7a3cd046 100644
--- a/ip/ip6tunnel.c
+++ b/ip/ip6tunnel.c
@@ -68,14 +68,17 @@ static void usage(void)
static void print_tunnel(struct ip6_tnl_parm2 *p)
{
- char remote[64];
- char local[64];
-
- inet_ntop(AF_INET6, &p->raddr, remote, sizeof(remote));
- inet_ntop(AF_INET6, &p->laddr, local, sizeof(local));
+ char s1[1024];
+ char s2[1024];
+ /* Do not use format_host() for local addr,
+ * symbolic name will not be useful.
+ */
printf("%s: %s/ipv6 remote %s local %s",
- p->name, tnl_strproto(p->proto), remote, local);
+ p->name,
+ tnl_strproto(p->proto),
+ format_host(AF_INET6, 16, &p->raddr, s1, sizeof(s1)),
+ rt_addr_n2a(AF_INET6, 16, &p->laddr, s2, sizeof(s2)));
if (p->link) {
const char *n = ll_index_to_name(p->link);
if (n)
@@ -107,22 +110,22 @@ static void print_tunnel(struct ip6_tnl_parm2 *p)
printf(" dscp inherit");
if (p->proto == IPPROTO_GRE) {
- if ((p->i_flags&GRE_KEY) && (p->o_flags&GRE_KEY) && p->o_key == p->i_key)
+ if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) && p->o_key == p->i_key)
printf(" key %u", ntohl(p->i_key));
- else if ((p->i_flags|p->o_flags)&GRE_KEY) {
- if (p->i_flags&GRE_KEY)
- printf(" ikey %u ", ntohl(p->i_key));
- if (p->o_flags&GRE_KEY)
- printf(" okey %u ", ntohl(p->o_key));
+ else if ((p->i_flags | p->o_flags) & GRE_KEY) {
+ if (p->i_flags & GRE_KEY)
+ printf(" ikey %u", ntohl(p->i_key));
+ if (p->o_flags & GRE_KEY)
+ printf(" okey %u", ntohl(p->o_key));
}
- if (p->i_flags&GRE_SEQ)
+ if (p->i_flags & GRE_SEQ)
printf("%s Drop packets out of sequence.", _SL_);
- if (p->i_flags&GRE_CSUM)
+ if (p->i_flags & GRE_CSUM)
printf("%s Checksum in received packet is required.", _SL_);
- if (p->o_flags&GRE_SEQ)
+ if (p->o_flags & GRE_SEQ)
printf("%s Sequence packets on output.", _SL_);
- if (p->o_flags&GRE_CSUM)
+ if (p->o_flags & GRE_CSUM)
printf("%s Checksum output packets.", _SL_);
}
}
@@ -230,45 +233,18 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
invarg("not inherit", *argv);
p->flags |= IP6_TNL_F_RCV_DSCP_COPY;
} else if (strcmp(*argv, "key") == 0) {
- unsigned uval;
NEXT_ARG();
p->i_flags |= GRE_KEY;
p->o_flags |= GRE_KEY;
- if (strchr(*argv, '.'))
- p->i_key = p->o_key = get_addr32(*argv);
- else {
- if (get_unsigned(&uval, *argv, 0) < 0) {
- fprintf(stderr, "invalid value of \"key\"\n");
- exit(-1);
- }
- p->i_key = p->o_key = htonl(uval);
- }
+ p->i_key = p->o_key = tnl_parse_key("key", *argv);
} else if (strcmp(*argv, "ikey") == 0) {
- unsigned uval;
NEXT_ARG();
p->i_flags |= GRE_KEY;
- if (strchr(*argv, '.'))
- p->i_key = get_addr32(*argv);
- else {
- if (get_unsigned(&uval, *argv, 0)<0) {
- fprintf(stderr, "invalid value of \"ikey\"\n");
- exit(-1);
- }
- p->i_key = htonl(uval);
- }
+ p->i_key = tnl_parse_key("ikey", *argv);
} else if (strcmp(*argv, "okey") == 0) {
- unsigned uval;
NEXT_ARG();
p->o_flags |= GRE_KEY;
- if (strchr(*argv, '.'))
- p->o_key = get_addr32(*argv);
- else {
- if (get_unsigned(&uval, *argv, 0)<0) {
- fprintf(stderr, "invalid value of \"okey\"\n");
- exit(-1);
- }
- p->o_key = htonl(uval);
- }
+ p->o_key = tnl_parse_key("okey", *argv);
} else if (strcmp(*argv, "seq") == 0) {
p->i_flags |= GRE_SEQ;
p->o_flags |= GRE_SEQ;
@@ -286,8 +262,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
} else {
if (strcmp(*argv, "name") == 0) {
NEXT_ARG();
- }
- if (matches(*argv, "help") == 0)
+ } else if (matches(*argv, "help") == 0)
usage();
if (p->name[0])
duparg2("name", *argv);
@@ -305,8 +280,10 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
}
if (medium[0]) {
p->link = ll_name_to_index(medium);
- if (p->link == 0)
+ if (p->link == 0) {
+ fprintf(stderr, "Cannot find device \"%s\"\n", medium);
return -1;
+ }
}
return 0;
}
@@ -351,23 +328,19 @@ static int do_tunnels_list(struct ip6_tnl_parm2 *p)
FILE *fp = fopen("/proc/net/dev", "r");
if (fp == NULL) {
perror("fopen");
- goto end;
+ return -1;
}
/* skip two lines at the begenning of the file */
if (!fgets(buf, sizeof(buf), fp) ||
!fgets(buf, sizeof(buf), fp)) {
fprintf(stderr, "/proc/net/dev read error\n");
- return -1;
+ goto end;
}
while (fgets(buf, sizeof(buf), fp) != NULL) {
char name[IFNAMSIZ];
int index, type;
- unsigned long rx_bytes, rx_packets, rx_errs, rx_drops,
- rx_fifo, rx_frame,
- tx_bytes, tx_packets, tx_errs, tx_drops,
- tx_fifo, tx_colls, tx_carrier, rx_multi;
struct ip6_tnl_parm2 p1;
char *ptr;
@@ -377,12 +350,6 @@ static int do_tunnels_list(struct ip6_tnl_parm2 *p)
fprintf(stderr, "Wrong format for /proc/net/dev. Giving up.\n");
goto end;
}
- if (sscanf(ptr, "%ld%ld%ld%ld%ld%ld%ld%*d%ld%ld%ld%ld%ld%ld%ld",
- &rx_bytes, &rx_packets, &rx_errs, &rx_drops,
- &rx_fifo, &rx_frame, &rx_multi,
- &tx_bytes, &tx_packets, &tx_errs, &tx_drops,
- &tx_fifo, &tx_colls, &tx_carrier) != 14)
- continue;
if (p->name[0] && strcmp(p->name, name))
continue;
index = ll_name_to_index(name);
@@ -408,22 +375,13 @@ static int do_tunnels_list(struct ip6_tnl_parm2 *p)
if (!ip6_tnl_parm_match(p, &p1))
continue;
print_tunnel(&p1);
- if (show_stats) {
- printf("%s", _SL_);
- printf("RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts%s", _SL_);
- printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-8ld%s",
- rx_packets, rx_bytes, rx_errs, rx_frame, rx_fifo, rx_multi, _SL_);
- printf("TX: Packets Bytes Errors DeadLoop NoRoute NoBufs%s", _SL_);
- printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-6ld",
- tx_packets, tx_bytes, tx_errs, tx_colls, tx_carrier, tx_drops);
- }
+ if (show_stats)
+ tnl_print_stats(ptr);
printf("\n");
}
err = 0;
-
end:
- if (fp)
- fclose(fp);
+ fclose(fp);
return err;
}
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 89a495ea..9a846df3 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -1,61 +1,60 @@
-extern int get_operstate(const char *name);
-extern int print_linkinfo(const struct sockaddr_nl *who,
- struct nlmsghdr *n,
- void *arg);
-extern int print_addrinfo(const struct sockaddr_nl *who,
- struct nlmsghdr *n,
- void *arg);
-extern int print_addrlabel(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int print_neigh(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int print_ntable(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int ipaddr_list(int argc, char **argv);
-extern int ipaddr_list_link(int argc, char **argv);
+int get_operstate(const char *name);
+int print_linkinfo(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_linkinfo_brief(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_addrinfo(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_addrlabel(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_neigh(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int ipaddr_list_link(int argc, char **argv);
void ipaddr_get_vf_rate(int, int *, int *, int);
-extern int iproute_monitor(int argc, char **argv);
-extern void iplink_usage(void) __attribute__((noreturn));
+void iplink_usage(void) __attribute__((noreturn));
-extern void iproute_reset_filter(int ifindex);
-extern void ipmroute_reset_filter(int ifindex);
-extern void ipaddr_reset_filter(int oneline, int ifindex);
-extern void ipneigh_reset_filter(int ifindex);
-extern void ipntable_reset_filter(void);
-extern void ipnetconf_reset_filter(int ifindex);
+void iproute_reset_filter(int ifindex);
+void ipmroute_reset_filter(int ifindex);
+void ipaddr_reset_filter(int oneline, int ifindex);
+void ipneigh_reset_filter(int ifindex);
+void ipnetconf_reset_filter(int ifindex);
-extern int print_route(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int print_mroute(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int print_prefix(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int print_rule(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int print_netconf(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg);
-extern int do_ipaddr(int argc, char **argv);
-extern int do_ipaddrlabel(int argc, char **argv);
-extern int do_iproute(int argc, char **argv);
-extern int do_iprule(int argc, char **argv);
-extern int do_ipneigh(int argc, char **argv);
-extern int do_ipntable(int argc, char **argv);
-extern int do_iptunnel(int argc, char **argv);
-extern int do_ip6tunnel(int argc, char **argv);
-extern int do_iptuntap(int argc, char **argv);
-extern int do_iplink(int argc, char **argv);
-extern int do_ipmonitor(int argc, char **argv);
-extern int do_multiaddr(int argc, char **argv);
-extern int do_multiroute(int argc, char **argv);
-extern int do_multirule(int argc, char **argv);
-extern int do_netns(int argc, char **argv);
-extern int do_xfrm(int argc, char **argv);
-extern int do_ipl2tp(int argc, char **argv);
-extern int do_ipfou(int argc, char **argv);
-extern int do_tcp_metrics(int argc, char **argv);
-extern int do_ipnetconf(int argc, char **argv);
-extern int do_iptoken(int argc, char **argv);
-extern int iplink_get(unsigned int flags, char *name, __u32 filt_mask);
+int print_route(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_mroute(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_prefix(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_rule(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int print_netconf(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg);
+void netns_map_init(void);
+int print_nsid(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg);
+int do_ipaddr(int argc, char **argv);
+int do_ipaddrlabel(int argc, char **argv);
+int do_iproute(int argc, char **argv);
+int do_iprule(int argc, char **argv);
+int do_ipneigh(int argc, char **argv);
+int do_ipntable(int argc, char **argv);
+int do_iptunnel(int argc, char **argv);
+int do_ip6tunnel(int argc, char **argv);
+int do_iptuntap(int argc, char **argv);
+int do_iplink(int argc, char **argv);
+int do_ipmonitor(int argc, char **argv);
+int do_multiaddr(int argc, char **argv);
+int do_multiroute(int argc, char **argv);
+int do_multirule(int argc, char **argv);
+int do_netns(int argc, char **argv);
+int do_xfrm(int argc, char **argv);
+int do_ipl2tp(int argc, char **argv);
+int do_ipfou(int argc, char **argv);
+int do_tcp_metrics(int argc, char **argv);
+int do_ipnetconf(int argc, char **argv);
+int do_iptoken(int argc, char **argv);
+int iplink_get(unsigned int flags, char *name, __u32 filt_mask);
static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
{
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 99a6ab59..9d254d27 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -34,6 +34,7 @@
#include "utils.h"
#include "ll_map.h"
#include "ip_common.h"
+#include "color.h"
enum {
IPADD_LIST,
@@ -70,22 +71,22 @@ static void usage(void)
if (do_link) {
iplink_usage();
}
- fprintf(stderr, "Usage: ip addr {add|change|replace} IFADDR dev STRING [ LIFETIME ]\n");
+ fprintf(stderr, "Usage: ip address {add|change|replace} IFADDR dev IFNAME [ LIFETIME ]\n");
fprintf(stderr, " [ CONFFLAG-LIST ]\n");
- fprintf(stderr, " ip addr del IFADDR dev STRING [mngtmpaddr]\n");
- fprintf(stderr, " ip addr {show|save|flush} [ dev STRING ] [ scope SCOPE-ID ]\n");
- fprintf(stderr, " [ to PREFIX ] [ FLAG-LIST ] [ label PATTERN ] [up]\n");
- fprintf(stderr, " ip addr {showdump|restore}\n");
+ fprintf(stderr, " ip address del IFADDR dev IFNAME [mngtmpaddr]\n");
+ fprintf(stderr, " ip address {show|save|flush} [ dev IFNAME ] [ scope SCOPE-ID ]\n");
+ fprintf(stderr, " [ to PREFIX ] [ FLAG-LIST ] [ label LABEL ] [up]\n");
+ fprintf(stderr, " ip address {showdump|restore}\n");
fprintf(stderr, "IFADDR := PREFIX | ADDR peer PREFIX\n");
fprintf(stderr, " [ broadcast ADDR ] [ anycast ADDR ]\n");
- fprintf(stderr, " [ label STRING ] [ scope SCOPE-ID ]\n");
+ fprintf(stderr, " [ label IFNAME ] [ scope SCOPE-ID ]\n");
fprintf(stderr, "SCOPE-ID := [ host | link | global | NUMBER ]\n");
fprintf(stderr, "FLAG-LIST := [ FLAG-LIST ] FLAG\n");
fprintf(stderr, "FLAG := [ permanent | dynamic | secondary | primary |\n");
fprintf(stderr, " [-]tentative | [-]deprecated | [-]dadfailed | temporary |\n");
fprintf(stderr, " CONFFLAG-LIST ]\n");
fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n");
- fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n");
+ fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n");
fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n");
fprintf(stderr, "LFT := forever | SECONDS\n");
@@ -136,8 +137,24 @@ static void print_operstate(FILE *f, __u8 state)
{
if (state >= sizeof(oper_states)/sizeof(oper_states[0]))
fprintf(f, "state %#x ", state);
- else
- fprintf(f, "state %s ", oper_states[state]);
+ else {
+ if (brief) {
+ if (strcmp(oper_states[state], "UP") == 0)
+ color_fprintf(f, COLOR_OPERSTATE_UP, "%-14s ", oper_states[state]);
+ else if (strcmp(oper_states[state], "DOWN") == 0)
+ color_fprintf(f, COLOR_OPERSTATE_DOWN, "%-14s ", oper_states[state]);
+ else
+ fprintf(f, "%-14s ", oper_states[state]);
+ } else {
+ fprintf(f, "state ");
+ if (strcmp(oper_states[state], "UP") == 0)
+ color_fprintf(f, COLOR_OPERSTATE_UP, "%s ", oper_states[state]);
+ else if (strcmp(oper_states[state], "DOWN") == 0)
+ color_fprintf(f, COLOR_OPERSTATE_DOWN, "%s ", oper_states[state]);
+ else
+ fprintf(f, "%s ", oper_states[state]);
+ }
+ }
}
int get_operstate(const char *name)
@@ -268,17 +285,29 @@ static void print_af_spec(FILE *fp, struct rtattr *af_spec_attr)
parse_rtattr_nested(tb, IFLA_INET6_MAX, inet6_attr);
if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
- switch (rta_getattr_u8(tb[IFLA_INET6_ADDR_GEN_MODE])) {
+ __u8 mode = rta_getattr_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
+ switch (mode) {
case IN6_ADDR_GEN_MODE_EUI64:
fprintf(fp, "addrgenmode eui64 ");
break;
case IN6_ADDR_GEN_MODE_NONE:
fprintf(fp, "addrgenmode none ");
break;
+ case IN6_ADDR_GEN_MODE_STABLE_PRIVACY:
+ fprintf(fp, "addrgenmode stable_secret ");
+ break;
+ case IN6_ADDR_GEN_MODE_RANDOM:
+ fprintf(fp, "addrgenmode random ");
+ break;
+ default:
+ fprintf(fp, "addrgenmode %#.2hhx ", mode);
+ break;
}
}
}
+static void print_vf_stats64(FILE *fp, struct rtattr *vfstats);
+
static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
{
struct ifla_vf_mac *vf_mac;
@@ -326,7 +355,7 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
} else
vf_linkstate = NULL;
- fprintf(fp, "\n vf %d MAC %s", vf_mac->vf,
+ fprintf(fp, "%s vf %d MAC %s", _SL_, vf_mac->vf,
ll_addr_n2a((unsigned char *)&vf_mac->mac,
ETH_ALEN, 0, b1, sizeof(b1)));
if (vf_vlan->vlan)
@@ -359,6 +388,8 @@ static void print_vfinfo(FILE *fp, struct rtattr *vfinfo)
else
fprintf(fp, ", link-state disable");
}
+ if (vf[IFLA_VF_STATS] && show_stats)
+ print_vf_stats64(fp, vf[IFLA_VF_STATS]);
}
static void print_num(FILE *fp, unsigned width, uint64_t count)
@@ -400,6 +431,36 @@ static void print_num(FILE *fp, unsigned width, uint64_t count)
fprintf(fp, "%-*s ", width, buf);
}
+static void print_vf_stats64(FILE *fp, struct rtattr *vfstats)
+{
+ struct rtattr *vf[IFLA_VF_STATS_MAX + 1] = {};
+
+ if (vfstats->rta_type != IFLA_VF_STATS) {
+ fprintf(stderr, "BUG: rta type is %d\n", vfstats->rta_type);
+ return;
+ }
+
+ parse_rtattr_nested(vf, IFLA_VF_MAX, vfstats);
+
+ /* RX stats */
+ fprintf(fp, "%s", _SL_);
+ fprintf(fp, " RX: bytes packets mcast bcast %s", _SL_);
+ fprintf(fp, " ");
+
+ print_num(fp, 10, *(__u64 *)RTA_DATA(vf[IFLA_VF_STATS_RX_BYTES]));
+ print_num(fp, 8, *(__u64 *)RTA_DATA(vf[IFLA_VF_STATS_RX_PACKETS]));
+ print_num(fp, 7, *(__u64 *)RTA_DATA(vf[IFLA_VF_STATS_MULTICAST]));
+ print_num(fp, 7, *(__u64 *)RTA_DATA(vf[IFLA_VF_STATS_BROADCAST]));
+
+ /* TX stats */
+ fprintf(fp, "%s", _SL_);
+ fprintf(fp, " TX: bytes packets %s", _SL_);
+ fprintf(fp, " ");
+
+ print_num(fp, 10, *(__u64 *)RTA_DATA(vf[IFLA_VF_STATS_TX_BYTES]));
+ print_num(fp, 8, *(__u64 *)RTA_DATA(vf[IFLA_VF_STATS_TX_PACKETS]));
+}
+
static void print_link_stats64(FILE *fp, const struct rtnl_link_stats64 *s,
const struct rtattr *carrier_changes)
{
@@ -548,6 +609,107 @@ static void print_link_stats(FILE *fp, struct nlmsghdr *n)
fprintf(fp, "%s", _SL_);
}
+int print_linkinfo_brief(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg)
+{
+ FILE *fp = (FILE*)arg;
+ struct ifinfomsg *ifi = NLMSG_DATA(n);
+ struct rtattr * tb[IFLA_MAX+1];
+ int len = n->nlmsg_len;
+ char *name;
+ char buf[32] = { 0, };
+ unsigned m_flag = 0;
+
+ if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK)
+ return -1;
+
+ len -= NLMSG_LENGTH(sizeof(*ifi));
+ if (len < 0)
+ return -1;
+
+ if (filter.ifindex && ifi->ifi_index != filter.ifindex)
+ return -1;
+ if (filter.up && !(ifi->ifi_flags&IFF_UP))
+ return -1;
+
+ parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
+ if (tb[IFLA_IFNAME] == NULL) {
+ fprintf(stderr, "BUG: device with ifindex %d has nil ifname\n", ifi->ifi_index);
+ }
+ if (filter.label &&
+ (!filter.family || filter.family == AF_PACKET) &&
+ fnmatch(filter.label, RTA_DATA(tb[IFLA_IFNAME]), 0))
+ return -1;
+
+ if (tb[IFLA_GROUP]) {
+ int group = *(int*)RTA_DATA(tb[IFLA_GROUP]);
+ if (filter.group != -1 && group != filter.group)
+ return -1;
+ }
+
+ if (tb[IFLA_MASTER]) {
+ int master = *(int*)RTA_DATA(tb[IFLA_MASTER]);
+ if (filter.master > 0 && master != filter.master)
+ return -1;
+ }
+ else if (filter.master > 0)
+ return -1;
+
+ if (filter.kind) {
+ if (tb[IFLA_LINKINFO]) {
+ char *kind = parse_link_kind(tb[IFLA_LINKINFO]);
+
+ if (strcmp(kind, filter.kind))
+ return -1;
+ } else {
+ return -1;
+ }
+ }
+
+ if (n->nlmsg_type == RTM_DELLINK)
+ fprintf(fp, "Deleted ");
+
+ name = (char *)(tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : "<nil>");
+
+ if (tb[IFLA_LINK]) {
+ SPRINT_BUF(b1);
+ int iflink = *(int*)RTA_DATA(tb[IFLA_LINK]);
+ if (iflink == 0)
+ snprintf(buf, sizeof(buf), "%s@NONE", name);
+ else {
+ snprintf(buf, sizeof(buf),
+ "%s@%s", name, ll_idx_n2a(iflink, b1));
+ m_flag = ll_index_to_flags(iflink);
+ m_flag = !(m_flag & IFF_UP);
+ }
+ } else
+ snprintf(buf, sizeof(buf), "%s", name);
+
+ fprintf(fp, "%-16s ", buf);
+
+ if (tb[IFLA_OPERSTATE])
+ print_operstate(fp, rta_getattr_u8(tb[IFLA_OPERSTATE]));
+
+ if (filter.family == AF_PACKET) {
+ SPRINT_BUF(b1);
+ if (tb[IFLA_ADDRESS]) {
+ color_fprintf(fp, COLOR_MAC, "%s ",
+ ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]),
+ RTA_PAYLOAD(tb[IFLA_ADDRESS]),
+ ifi->ifi_type,
+ b1, sizeof(b1)));
+ }
+ }
+
+ if (filter.family == AF_PACKET)
+ print_link_flags(fp, ifi->ifi_flags, m_flag);
+
+ if (filter.family == AF_PACKET)
+ fprintf(fp, "\n");
+ fflush(fp);
+ return 0;
+}
+
int print_linkinfo(const struct sockaddr_nl *who,
struct nlmsghdr *n, void *arg)
{
@@ -606,7 +768,8 @@ int print_linkinfo(const struct sockaddr_nl *who,
if (n->nlmsg_type == RTM_DELLINK)
fprintf(fp, "Deleted ");
- fprintf(fp, "%d: %s", ifi->ifi_index,
+ fprintf(fp, "%d: ", ifi->ifi_index);
+ color_fprintf(fp, COLOR_IFNAME, "%s",
tb[IFLA_IFNAME] ? rta_getattr_str(tb[IFLA_IFNAME]) : "<nil>");
if (tb[IFLA_LINK]) {
@@ -645,6 +808,14 @@ int print_linkinfo(const struct sockaddr_nl *who,
b1, sizeof(b1)));
}
+ if (tb[IFLA_PHYS_SWITCH_ID]) {
+ SPRINT_BUF(b1);
+ fprintf(fp, "switchid %s ",
+ hexstring_n2a(RTA_DATA(tb[IFLA_PHYS_SWITCH_ID]),
+ RTA_PAYLOAD(tb[IFLA_PHYS_SWITCH_ID]),
+ b1, sizeof(b1)));
+ }
+
if (tb[IFLA_OPERSTATE])
print_operstate(fp, rta_getattr_u8(tb[IFLA_OPERSTATE]));
@@ -666,10 +837,11 @@ int print_linkinfo(const struct sockaddr_nl *who,
fprintf(fp, " link/%s ", ll_type_n2a(ifi->ifi_type, b1, sizeof(b1)));
if (tb[IFLA_ADDRESS]) {
- fprintf(fp, "%s", ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]),
- RTA_PAYLOAD(tb[IFLA_ADDRESS]),
- ifi->ifi_type,
- b1, sizeof(b1)));
+ color_fprintf(fp, COLOR_MAC, "%s",
+ ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]),
+ RTA_PAYLOAD(tb[IFLA_ADDRESS]),
+ ifi->ifi_type,
+ b1, sizeof(b1)));
}
if (tb[IFLA_BROADCAST]) {
if (ifi->ifi_flags&IFF_POINTOPOINT)
@@ -692,6 +864,11 @@ int print_linkinfo(const struct sockaddr_nl *who,
fprintf(fp, " link-netnsid unknown");
}
+ if (tb[IFLA_PROTO_DOWN]) {
+ if (rta_getattr_u8(tb[IFLA_PROTO_DOWN]))
+ fprintf(fp, " protodown on ");
+ }
+
if (tb[IFLA_PROMISCUITY] && show_details)
fprintf(fp, " promiscuity %u ",
*(int*)RTA_DATA(tb[IFLA_PROMISCUITY]));
@@ -726,7 +903,17 @@ int print_linkinfo(const struct sockaddr_nl *who,
static int flush_update(void)
{
- if (rtnl_send_check(&rth, filter.flushb, filter.flushp) < 0) {
+
+ /*
+ * Note that the kernel may delete multiple addresses for one
+ * delete request (e.g. if ipv4 address promotion is disabled).
+ * Since a flush operation is really a series of delete requests
+ * its possible that we may request an address delete that has
+ * already been done by the kernel. Therefore, ignore EADDRNOTAVAIL
+ * errors returned from a flush request
+ */
+ if ((rtnl_send_check(&rth, filter.flushb, filter.flushp) < 0) &&
+ (errno != EADDRNOTAVAIL)) {
perror("Failed to send flush request");
return -1;
}
@@ -835,24 +1022,37 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n,
if (n->nlmsg_type == RTM_DELADDR)
fprintf(fp, "Deleted ");
- if (filter.oneline || filter.flushb)
- fprintf(fp, "%u: %s", ifa->ifa_index, ll_index_to_name(ifa->ifa_index));
- if (ifa->ifa_family == AF_INET)
- fprintf(fp, " inet ");
- else if (ifa->ifa_family == AF_INET6)
- fprintf(fp, " inet6 ");
- else if (ifa->ifa_family == AF_DECnet)
- fprintf(fp, " dnet ");
- else if (ifa->ifa_family == AF_IPX)
- fprintf(fp, " ipx ");
- else
- fprintf(fp, " family %d ", ifa->ifa_family);
+ if (!brief) {
+ if (filter.oneline || filter.flushb)
+ fprintf(fp, "%u: %s", ifa->ifa_index, ll_index_to_name(ifa->ifa_index));
+ if (ifa->ifa_family == AF_INET)
+ fprintf(fp, " inet ");
+ else if (ifa->ifa_family == AF_INET6)
+ fprintf(fp, " inet6 ");
+ else if (ifa->ifa_family == AF_DECnet)
+ fprintf(fp, " dnet ");
+ else if (ifa->ifa_family == AF_IPX)
+ fprintf(fp, " ipx ");
+ else
+ fprintf(fp, " family %d ", ifa->ifa_family);
+ }
if (rta_tb[IFA_LOCAL]) {
- fprintf(fp, "%s", format_host(ifa->ifa_family,
- RTA_PAYLOAD(rta_tb[IFA_LOCAL]),
- RTA_DATA(rta_tb[IFA_LOCAL]),
- abuf, sizeof(abuf)));
+ if (ifa->ifa_family == AF_INET)
+ color_fprintf(fp, COLOR_INET, "%s", format_host(ifa->ifa_family,
+ RTA_PAYLOAD(rta_tb[IFA_LOCAL]),
+ RTA_DATA(rta_tb[IFA_LOCAL]),
+ abuf, sizeof(abuf)));
+ else if (ifa->ifa_family == AF_INET6)
+ color_fprintf(fp, COLOR_INET6, "%s", format_host(ifa->ifa_family,
+ RTA_PAYLOAD(rta_tb[IFA_LOCAL]),
+ RTA_DATA(rta_tb[IFA_LOCAL]),
+ abuf, sizeof(abuf)));
+ else
+ fprintf(fp, "%s", format_host(ifa->ifa_family,
+ RTA_PAYLOAD(rta_tb[IFA_LOCAL]),
+ RTA_DATA(rta_tb[IFA_LOCAL]),
+ abuf, sizeof(abuf)));
if (rta_tb[IFA_ADDRESS] == NULL ||
memcmp(RTA_DATA(rta_tb[IFA_ADDRESS]), RTA_DATA(rta_tb[IFA_LOCAL]),
@@ -868,6 +1068,9 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n,
}
}
+ if (brief)
+ goto brief_exit;
+
if (rta_tb[IFA_BROADCAST]) {
fprintf(fp, "brd %s ",
format_host(ifa->ifa_family,
@@ -915,6 +1118,10 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n,
ifa_flags &= ~IFA_F_NOPREFIXROUTE;
fprintf(fp, "noprefixroute ");
}
+ if (ifa_flags & IFA_F_MCAUTOJOIN) {
+ ifa_flags &= ~IFA_F_MCAUTOJOIN;
+ fprintf(fp, "autojoin ");
+ }
if (!(ifa_flags & IFA_F_PERMANENT)) {
fprintf(fp, "dynamic ");
} else
@@ -946,32 +1153,11 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n,
}
}
fprintf(fp, "\n");
+brief_exit:
fflush(fp);
return 0;
}
-static int print_addrinfo_primary(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg)
-{
- struct ifaddrmsg *ifa = NLMSG_DATA(n);
-
- if (ifa->ifa_flags & IFA_F_SECONDARY)
- return 0;
-
- return print_addrinfo(who, n, arg);
-}
-
-static int print_addrinfo_secondary(const struct sockaddr_nl *who,
- struct nlmsghdr *n, void *arg)
-{
- struct ifaddrmsg *ifa = NLMSG_DATA(n);
-
- if (!(ifa->ifa_flags & IFA_F_SECONDARY))
- return 0;
-
- return print_addrinfo(who, n, arg);
-}
-
struct nlmsg_list
{
struct nlmsg_list *next;
@@ -1006,6 +1192,10 @@ static int print_selected_addrinfo(struct ifinfomsg *ifi,
print_addrinfo(NULL, n, fp);
}
+ if (brief) {
+ fprintf(fp, "\n");
+ fflush(fp);
+ }
return 0;
}
@@ -1059,7 +1249,7 @@ static int ipadd_dump_check_magic(void)
__u32 magic = 0;
if (isatty(STDIN_FILENO)) {
- fprintf(stderr, "Can't restore addr dump from a terminal\n");
+ fprintf(stderr, "Can't restore address dump from a terminal\n");
return -1;
}
@@ -1086,7 +1276,9 @@ static int save_nlmsg(const struct sockaddr_nl *who, struct nlmsghdr *n,
return ret == n->nlmsg_len ? 0 : ret;
}
-static int show_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, void *arg)
+static int show_handler(const struct sockaddr_nl *nl,
+ struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
{
struct ifaddrmsg *ifa = NLMSG_DATA(n);
@@ -1103,7 +1295,9 @@ static int ipaddr_showdump(void)
exit(rtnl_from_file(stdin, &show_handler, NULL));
}
-static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, void *arg)
+static int restore_handler(const struct sockaddr_nl *nl,
+ struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
{
int ret;
@@ -1111,7 +1305,7 @@ static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, voi
ll_init_map(&rth);
- ret = rtnl_talk(&rth, n, 0, 0, n);
+ ret = rtnl_talk(&rth, n, n, sizeof(*n));
if ((ret < 0) && (errno == EEXIST))
ret = 0;
@@ -1214,26 +1408,13 @@ static int ipaddr_flush(void)
filter.flushe = sizeof(flushb);
while ((max_flush_loops == 0) || (round < max_flush_loops)) {
- const struct rtnl_dump_filter_arg a[3] = {
- {
- .filter = print_addrinfo_secondary,
- .arg1 = stdout,
- },
- {
- .filter = print_addrinfo_primary,
- .arg1 = stdout,
- },
- {
- .filter = NULL,
- .arg1 = NULL,
- },
- };
if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) {
perror("Cannot send dump request");
exit(1);
}
filter.flushed = 0;
- if (rtnl_dump_filter_l(&rth, a) < 0) {
+ if (rtnl_dump_filter_nc(&rth, print_addrinfo,
+ stdout, NLM_F_DUMP_INTR) < 0) {
fprintf(stderr, "Flush terminated\n");
exit(1);
}
@@ -1280,10 +1461,7 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
ipaddr_reset_filter(oneline, 0);
filter.showqueue = 1;
-
- if (filter.family == AF_UNSPEC)
- filter.family = preferred_family;
-
+ filter.family = preferred_family;
filter.group = -1;
if (action == IPADD_FLUSH) {
@@ -1354,6 +1532,9 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
} else if (strcmp(*argv, "noprefixroute") == 0) {
filter.flags |= IFA_F_NOPREFIXROUTE;
filter.flagmask |= IFA_F_NOPREFIXROUTE;
+ } else if (strcmp(*argv, "autojoin") == 0) {
+ filter.flags |= IFA_F_MCAUTOJOIN;
+ filter.flagmask |= IFA_F_MCAUTOJOIN;
} else if (strcmp(*argv, "dadfailed") == 0) {
filter.flags |= IFA_F_DADFAILED;
filter.flagmask |= IFA_F_DADFAILED;
@@ -1381,7 +1562,7 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
}
- if (matches(*argv, "help") == 0)
+ else if (matches(*argv, "help") == 0)
usage();
if (filter_dev)
duparg2("dev", *argv);
@@ -1460,9 +1641,16 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
for (l = linfo.head; l; l = l->next) {
int res = 0;
+ struct ifinfomsg *ifi = NLMSG_DATA(&l->h);
- if (no_link || (res = print_linkinfo(NULL, &l->h, stdout)) >= 0) {
- struct ifinfomsg *ifi = NLMSG_DATA(&l->h);
+ if (brief) {
+ if (print_linkinfo_brief(NULL, &l->h, stdout) == 0)
+ if (filter.family != AF_PACKET)
+ print_selected_addrinfo(ifi,
+ ainfo.head,
+ stdout);
+ } else if (no_link ||
+ (res = print_linkinfo(NULL, &l->h, stdout)) >= 0) {
if (filter.family != AF_PACKET)
print_selected_addrinfo(ifi,
ainfo.head, stdout);
@@ -1558,6 +1746,16 @@ static int default_scope(inet_prefix *lcl)
return 0;
}
+static bool ipaddr_is_multicast(inet_prefix *a)
+{
+ if (a->family == AF_INET)
+ return IN_MULTICAST(ntohl(a->data[0]));
+ else if (a->family == AF_INET6)
+ return IN6_IS_ADDR_MULTICAST(a->data);
+ else
+ return false;
+}
+
static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
{
struct {
@@ -1665,6 +1863,8 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
ifa_flags |= IFA_F_MANAGETEMPADDR;
} else if (strcmp(*argv, "noprefixroute") == 0) {
ifa_flags |= IFA_F_NOPREFIXROUTE;
+ } else if (strcmp(*argv, "autojoin") == 0) {
+ ifa_flags |= IFA_F_MCAUTOJOIN;
} else {
if (strcmp(*argv, "local") == 0) {
NEXT_ARG();
@@ -1755,7 +1955,12 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
sizeof(cinfo));
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if ((ifa_flags & IFA_F_MCAUTOJOIN) && !ipaddr_is_multicast(&lcl)) {
+ fprintf(stderr, "autojoin needs multicast address\n");
+ return -1;
+ }
+
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
@@ -1787,6 +1992,6 @@ int do_ipaddr(int argc, char **argv)
return ipaddr_restore();
if (matches(*argv, "help") == 0)
usage();
- fprintf(stderr, "Command \"%s\" is unknown, try \"ip addr help\".\n", *argv);
+ fprintf(stderr, "Command \"%s\" is unknown, try \"ip address help\".\n", *argv);
exit(-1);
}
diff --git a/ip/ipaddrlabel.c b/ip/ipaddrlabel.c
index f6a638b5..f01bc269 100644
--- a/ip/ipaddrlabel.c
+++ b/ip/ipaddrlabel.c
@@ -182,8 +182,8 @@ static int ipaddrlabel_modify(int cmd, int argc, char **argv)
if (req.ifal.ifal_family == AF_UNSPEC)
req.ifal.ifal_family = AF_INET6;
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
- return 2;
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+ return -2;
return 0;
}
@@ -209,7 +209,7 @@ static int flush_addrlabel(const struct sockaddr_nl *who, struct nlmsghdr *n, vo
if (rtnl_open(&rth2, 0) < 0)
return -1;
- if (rtnl_talk(&rth2, n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth2, n, NULL, 0) < 0)
return -2;
rtnl_close(&rth2);
@@ -232,12 +232,12 @@ static int ipaddrlabel_flush(int argc, char **argv)
if (rtnl_wilddump_request(&rth, af, RTM_GETADDRLABEL) < 0) {
perror("Cannot send dump request");
- return 1;
+ return -1;
}
if (rtnl_dump_filter(&rth, flush_addrlabel, NULL) < 0) {
fprintf(stderr, "Flush terminated\n");
- return 1;
+ return -1;
}
return 0;
@@ -263,4 +263,3 @@ int do_ipaddrlabel(int argc, char **argv)
fprintf(stderr, "Command \"%s\" is unknown, try \"ip addrlabel help\".\n", *argv);
exit(-1);
}
-
diff --git a/ip/ipfou.c b/ip/ipfou.c
index 26760453..8a86b18f 100644
--- a/ip/ipfou.c
+++ b/ip/ipfou.c
@@ -112,7 +112,7 @@ static int do_add(int argc, char **argv)
fou_parse_opt(argc, argv, &req.n, true);
- if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
@@ -124,7 +124,7 @@ static int do_del(int argc, char **argv)
fou_parse_opt(argc, argv, &req.n, false);
- if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
@@ -156,4 +156,3 @@ int do_ipfou(int argc, char **argv)
fprintf(stderr, "Command \"%s\" is unknown, try \"ip fou help\".\n", *argv);
exit(-1);
}
-
diff --git a/ip/ipl2tp.c b/ip/ipl2tp.c
index 5cd86322..f050880e 100644
--- a/ip/ipl2tp.c
+++ b/ip/ipl2tp.c
@@ -119,7 +119,7 @@ static int create_tunnel(struct l2tp_parm *p)
addattr16(&req.n, 1024, L2TP_ATTR_UDP_DPORT, p->peer_udp_port);
}
- if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
@@ -132,7 +132,7 @@ static int delete_tunnel(struct l2tp_parm *p)
addattr32(&req.n, 128, L2TP_ATTR_CONN_ID, p->tunnel_id);
- if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
@@ -166,7 +166,7 @@ static int create_session(struct l2tp_parm *p)
if (p->ifname && p->ifname[0])
addattrstrz(&req.n, 1024, L2TP_ATTR_IFNAME, p->ifname);
- if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
@@ -179,7 +179,7 @@ static int delete_session(struct l2tp_parm *p)
addattr32(&req.n, 1024, L2TP_ATTR_CONN_ID, p->tunnel_id);
addattr32(&req.n, 1024, L2TP_ATTR_SESSION_ID, p->session_id);
- if (rtnl_talk(&genl_rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
@@ -234,9 +234,10 @@ static void print_session(struct l2tp_data *data)
if (p->peer_cookie_len > 0)
print_cookie("peer cookie", p->peer_cookie, p->peer_cookie_len);
- if (p->reorder_timeout != 0) {
+ if (p->reorder_timeout != 0)
printf(" reorder timeout: %u\n", p->reorder_timeout);
- }
+ else
+ printf("\n");
}
static int get_response(struct nlmsghdr *n, void *arg)
diff --git a/ip/iplink.c b/ip/iplink.c
index e6f30e99..5ab9d613 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -53,9 +53,9 @@ void iplink_usage(void)
fprintf(stderr, " [ numtxqueues QUEUE_COUNT ]\n");
fprintf(stderr, " [ numrxqueues QUEUE_COUNT ]\n");
fprintf(stderr, " type TYPE [ ARGS ]\n");
- fprintf(stderr, " ip link delete DEV type TYPE [ ARGS ]\n");
+ fprintf(stderr, " ip link delete { DEVICE | dev DEVICE | group DEVGROUP } type TYPE [ ARGS ]\n");
fprintf(stderr, "\n");
- fprintf(stderr, " ip link set { dev DEVICE | group DEVGROUP } [ { up | down } ]\n");
+ fprintf(stderr, " ip link set { DEVICE | dev DEVICE | group DEVGROUP } [ { up | down } ]\n");
} else
fprintf(stderr, "Usage: ip link set DEVICE [ { up | down } ]\n");
@@ -77,13 +77,15 @@ void iplink_usage(void)
fprintf(stderr, " [ vf NUM [ mac LLADDR ]\n");
fprintf(stderr, " [ vlan VLANID [ qos VLAN-QOS ] ]\n");
- fprintf(stderr, " [ rate TXRATE ] ] \n");
+ fprintf(stderr, " [ rate TXRATE ] ]\n");
- fprintf(stderr, " [ spoofchk { on | off} ] ] \n");
+ fprintf(stderr, " [ spoofchk { on | off} ] ]\n");
+ fprintf(stderr, " [ query_rss { on | off} ] ]\n");
fprintf(stderr, " [ state { auto | enable | disable} ] ]\n");
fprintf(stderr, " [ master DEVICE ]\n");
fprintf(stderr, " [ nomaster ]\n");
- fprintf(stderr, " [ addrgenmode { eui64 | none } ]\n");
+ fprintf(stderr, " [ addrgenmode { eui64 | none | stable_secret | random } ]\n");
+ fprintf(stderr, " [ protodown { on | off } ]\n");
fprintf(stderr, " ip link show [ DEVICE | group GROUP ] [up] [master DEV] [type TYPE]\n");
if (iplink_have_newlink()) {
@@ -92,7 +94,7 @@ void iplink_usage(void)
fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | macvtap |\n");
fprintf(stderr, " bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |\n");
fprintf(stderr, " gre | gretap | ip6gre | ip6gretap | vti | nlmon |\n");
- fprintf(stderr, " bond_slave | ipvlan }\n");
+ fprintf(stderr, " bond_slave | ipvlan | geneve | bridge_slave | vrf }\n");
}
exit(-1);
}
@@ -104,7 +106,9 @@ static void usage(void)
static int on_off(const char *msg, const char *realval)
{
- fprintf(stderr, "Error: argument of \"%s\" must be \"on\" or \"off\", not \"%s\"\n", msg, realval);
+ fprintf(stderr,
+ "Error: argument of \"%s\" must be \"on\" or \"off\", not \"%s\"\n",
+ msg, realval);
return -1;
}
@@ -172,6 +176,10 @@ static int get_addr_gen_mode(const char *mode)
return IN6_ADDR_GEN_MODE_EUI64;
if (strcasecmp(mode, "none") == 0)
return IN6_ADDR_GEN_MODE_NONE;
+ if (strcasecmp(mode, "stable_secret") == 0)
+ return IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ if (strcasecmp(mode, "random") == 0)
+ return IN6_ADDR_GEN_MODE_RANDOM;
return -1;
}
@@ -179,6 +187,7 @@ static int get_addr_gen_mode(const char *mode)
static int have_rtnl_newlink = -1;
static int accept_msg(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
struct nlmsghdr *n, void *arg)
{
struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(n);
@@ -265,6 +274,7 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
NEXT_ARG();
if (matches(*argv, "mac") == 0) {
struct ifla_vf_mac ivm;
+
NEXT_ARG();
ivm.vf = vf;
len = ll_addr_a2n((char *)ivm.mac, 32, *argv);
@@ -273,19 +283,19 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
addattr_l(&req->n, sizeof(*req), IFLA_VF_MAC, &ivm, sizeof(ivm));
} else if (matches(*argv, "vlan") == 0) {
struct ifla_vf_vlan ivv;
+
NEXT_ARG();
- if (get_unsigned(&ivv.vlan, *argv, 0)) {
+ if (get_unsigned(&ivv.vlan, *argv, 0))
invarg("Invalid \"vlan\" value\n", *argv);
- }
+
ivv.vf = vf;
ivv.qos = 0;
if (NEXT_ARG_OK()) {
NEXT_ARG();
if (matches(*argv, "qos") == 0) {
NEXT_ARG();
- if (get_unsigned(&ivv.qos, *argv, 0)) {
+ if (get_unsigned(&ivv.qos, *argv, 0))
invarg("Invalid \"qos\" value\n", *argv);
- }
} else {
/* rewind arg */
PREV_ARG();
@@ -294,10 +304,11 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
addattr_l(&req->n, sizeof(*req), IFLA_VF_VLAN, &ivv, sizeof(ivv));
} else if (matches(*argv, "rate") == 0) {
struct ifla_vf_tx_rate ivt;
+
NEXT_ARG();
- if (get_unsigned(&ivt.rate, *argv, 0)) {
+ if (get_unsigned(&ivt.rate, *argv, 0))
invarg("Invalid \"rate\" value\n", *argv);
- }
+
ivt.vf = vf;
if (!new_rate_api)
addattr_l(&req->n, sizeof(*req),
@@ -321,18 +332,33 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
} else if (matches(*argv, "spoofchk") == 0) {
struct ifla_vf_spoofchk ivs;
+
NEXT_ARG();
if (matches(*argv, "on") == 0)
ivs.setting = 1;
else if (matches(*argv, "off") == 0)
ivs.setting = 0;
else
- invarg("Invalid \"spoofchk\" value\n", *argv);
+ return on_off("spoofchk", *argv);
ivs.vf = vf;
addattr_l(&req->n, sizeof(*req), IFLA_VF_SPOOFCHK, &ivs, sizeof(ivs));
+ } else if (matches(*argv, "query_rss") == 0) {
+ struct ifla_vf_rss_query_en ivs;
+
+ NEXT_ARG();
+ if (matches(*argv, "on") == 0)
+ ivs.setting = 1;
+ else if (matches(*argv, "off") == 0)
+ ivs.setting = 0;
+ else
+ return on_off("query_rss", *argv);
+ ivs.vf = vf;
+ addattr_l(&req->n, sizeof(*req), IFLA_VF_RSS_QUERY_EN, &ivs, sizeof(ivs));
+
} else if (matches(*argv, "state") == 0) {
struct ifla_vf_link_state ivl;
+
NEXT_ARG();
if (matches(*argv, "auto") == 0)
ivl.link_state = IFLA_VF_LINK_STATE_AUTO;
@@ -376,7 +402,8 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
}
int iplink_parse(int argc, char **argv, struct iplink_req *req,
- char **name, char **type, char **link, char **dev, int *group, int *index)
+ char **name, char **type, char **link, char **dev,
+ int *group, int *index)
{
int ret, len;
char abuf[32];
@@ -417,15 +444,15 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
return -1;
addattr_l(&req->n, sizeof(*req), IFLA_ADDRESS, abuf, len);
} else if (matches(*argv, "broadcast") == 0 ||
- strcmp(*argv, "brd") == 0) {
+ strcmp(*argv, "brd") == 0) {
NEXT_ARG();
len = ll_addr_a2n(abuf, sizeof(abuf), *argv);
if (len < 0)
return -1;
addattr_l(&req->n, sizeof(*req), IFLA_BROADCAST, abuf, len);
} else if (matches(*argv, "txqueuelen") == 0 ||
- strcmp(*argv, "qlen") == 0 ||
- matches(*argv, "txqlen") == 0) {
+ strcmp(*argv, "qlen") == 0 ||
+ matches(*argv, "txqlen") == 0) {
NEXT_ARG();
if (qlen != -1)
duparg("txqueuelen", *argv);
@@ -443,7 +470,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
NEXT_ARG();
if (netns != -1)
duparg("netns", *argv);
- if ((netns = netns_get_fd(*argv)) >= 0)
+ netns = netns_get_fd(*argv);
+ if (netns >= 0)
addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_FD, &netns, 4);
else if (get_integer(&netns, *argv, 0) == 0)
addattr_l(&req->n, sizeof(*req), IFLA_NET_NS_PID, &netns, 4);
@@ -452,54 +480,60 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
} else if (strcmp(*argv, "multicast") == 0) {
NEXT_ARG();
req->i.ifi_change |= IFF_MULTICAST;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
req->i.ifi_flags |= IFF_MULTICAST;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
req->i.ifi_flags &= ~IFF_MULTICAST;
- } else
+ else
return on_off("multicast", *argv);
} else if (strcmp(*argv, "allmulticast") == 0) {
NEXT_ARG();
req->i.ifi_change |= IFF_ALLMULTI;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
req->i.ifi_flags |= IFF_ALLMULTI;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
req->i.ifi_flags &= ~IFF_ALLMULTI;
- } else
+ else
return on_off("allmulticast", *argv);
} else if (strcmp(*argv, "promisc") == 0) {
NEXT_ARG();
req->i.ifi_change |= IFF_PROMISC;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
req->i.ifi_flags |= IFF_PROMISC;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
req->i.ifi_flags &= ~IFF_PROMISC;
- } else
+ else
return on_off("promisc", *argv);
} else if (strcmp(*argv, "trailers") == 0) {
NEXT_ARG();
req->i.ifi_change |= IFF_NOTRAILERS;
- if (strcmp(*argv, "off") == 0) {
+
+ if (strcmp(*argv, "off") == 0)
req->i.ifi_flags |= IFF_NOTRAILERS;
- } else if (strcmp(*argv, "on") == 0) {
+ else if (strcmp(*argv, "on") == 0)
req->i.ifi_flags &= ~IFF_NOTRAILERS;
- } else
+ else
return on_off("trailers", *argv);
} else if (strcmp(*argv, "arp") == 0) {
NEXT_ARG();
req->i.ifi_change |= IFF_NOARP;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
req->i.ifi_flags &= ~IFF_NOARP;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
req->i.ifi_flags |= IFF_NOARP;
- } else
- return on_off("noarp", *argv);
+ else
+ return on_off("arp", *argv);
} else if (strcmp(*argv, "vf") == 0) {
struct rtattr *vflist;
+
NEXT_ARG();
- if (get_integer(&vf, *argv, 0)) {
+ if (get_integer(&vf, *argv, 0))
invarg("Invalid \"vf\" value\n", *argv);
- }
+
vflist = addattr_nest(&req->n, sizeof(*req),
IFLA_VFINFO_LIST);
if (dev_index == 0)
@@ -511,6 +545,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
addattr_nest_end(&req->n, vflist);
} else if (matches(*argv, "master") == 0) {
int ifindex;
+
NEXT_ARG();
ifindex = ll_name_to_index(*argv);
if (!ifindex)
@@ -519,16 +554,18 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
&ifindex, 4);
} else if (matches(*argv, "nomaster") == 0) {
int ifindex = 0;
+
addattr_l(&req->n, sizeof(*req), IFLA_MASTER,
&ifindex, 4);
} else if (matches(*argv, "dynamic") == 0) {
NEXT_ARG();
req->i.ifi_change |= IFF_DYNAMIC;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
req->i.ifi_flags |= IFF_DYNAMIC;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
req->i.ifi_flags &= ~IFF_DYNAMIC;
- } else
+ else
return on_off("dynamic", *argv);
} else if (matches(*argv, "type") == 0) {
NEXT_ARG();
@@ -549,6 +586,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
invarg("Invalid \"group\" value\n", *argv);
} else if (strcmp(*argv, "mode") == 0) {
int mode;
+
NEXT_ARG();
mode = get_link_mode(*argv);
if (mode < 0)
@@ -556,6 +594,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
addattr8(&req->n, sizeof(*req), IFLA_LINKMODE, mode);
} else if (strcmp(*argv, "state") == 0) {
int state;
+
NEXT_ARG();
state = get_operstate(*argv);
if (state < 0)
@@ -581,6 +620,7 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
} else if (matches(*argv, "addrgenmode") == 0) {
struct rtattr *afs, *afs6;
int mode;
+
NEXT_ARG();
mode = get_addr_gen_mode(*argv);
if (mode < 0)
@@ -598,12 +638,24 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
invarg("Invalid \"link-netnsid\" value\n", *argv);
addattr32(&req->n, sizeof(*req), IFLA_LINK_NETNSID,
link_netnsid);
+ } else if (strcmp(*argv, "protodown") == 0) {
+ unsigned int proto_down;
+
+ NEXT_ARG();
+ if (strcmp(*argv, "on") == 0)
+ proto_down = 1;
+ else if (strcmp(*argv, "off") == 0)
+ proto_down = 0;
+ else
+ return on_off("protodown", *argv);
+ addattr8(&req->n, sizeof(*req), IFLA_PROTO_DOWN,
+ proto_down);
} else {
- if (strcmp(*argv, "dev") == 0) {
- NEXT_ARG();
- }
if (matches(*argv, "help") == 0)
usage();
+
+ if (strcmp(*argv, "dev") == 0)
+ NEXT_ARG();
if (*dev)
duparg2("dev", *argv);
*dev = *argv;
@@ -661,8 +713,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv)
req.i.ifi_index = 0;
addattr32(&req.n, sizeof(req), IFLA_GROUP, group);
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
- exit(2);
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+ return -2;
return 0;
}
}
@@ -760,8 +812,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv)
return -1;
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
- exit(2);
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+ return -2;
return 0;
}
@@ -770,7 +822,10 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask)
{
int len;
struct iplink_req req;
- char answer[16384];
+ struct {
+ struct nlmsghdr n;
+ char buf[16384];
+ } answer;
memset(&req, 0, sizeof(req));
@@ -790,10 +845,13 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask)
}
addattr32(&req.n, sizeof(req), IFLA_EXT_MASK, filt_mask);
- if (rtnl_talk(&rth, &req.n, 0, 0, (struct nlmsghdr *)answer) < 0)
+ if (rtnl_talk(&rth, &req.n, &answer.n, sizeof(answer)) < 0)
return -2;
- print_linkinfo(NULL, (struct nlmsghdr *)answer, stdout);
+ if (brief)
+ print_linkinfo_brief(NULL, &answer.n, stdout);
+ else
+ print_linkinfo(NULL, &answer.n, stdout);
return 0;
}
@@ -936,14 +994,14 @@ static int get_address(const char *dev, int *htype)
me.sll_family = AF_PACKET;
me.sll_ifindex = ifr.ifr_ifindex;
me.sll_protocol = htons(ETH_P_LOOP);
- if (bind(s, (struct sockaddr*)&me, sizeof(me)) == -1) {
+ if (bind(s, (struct sockaddr *)&me, sizeof(me)) == -1) {
perror("bind");
close(s);
return -1;
}
alen = sizeof(me);
- if (getsockname(s, (struct sockaddr*)&me, &alen) == -1) {
+ if (getsockname(s, (struct sockaddr *)&me, &alen) == -1) {
perror("getsockname");
close(s);
return -1;
@@ -1034,63 +1092,69 @@ static int do_set(int argc, char **argv)
} else if (strcmp(*argv, "multicast") == 0) {
NEXT_ARG();
mask |= IFF_MULTICAST;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
flags |= IFF_MULTICAST;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
flags &= ~IFF_MULTICAST;
- } else
+ else
return on_off("multicast", *argv);
} else if (strcmp(*argv, "allmulticast") == 0) {
NEXT_ARG();
mask |= IFF_ALLMULTI;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
flags |= IFF_ALLMULTI;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
flags &= ~IFF_ALLMULTI;
- } else
+ else
return on_off("allmulticast", *argv);
} else if (strcmp(*argv, "promisc") == 0) {
NEXT_ARG();
mask |= IFF_PROMISC;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
flags |= IFF_PROMISC;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
flags &= ~IFF_PROMISC;
- } else
+ else
return on_off("promisc", *argv);
} else if (strcmp(*argv, "trailers") == 0) {
NEXT_ARG();
mask |= IFF_NOTRAILERS;
- if (strcmp(*argv, "off") == 0) {
+
+ if (strcmp(*argv, "off") == 0)
flags |= IFF_NOTRAILERS;
- } else if (strcmp(*argv, "on") == 0) {
+ else if (strcmp(*argv, "on") == 0)
flags &= ~IFF_NOTRAILERS;
- } else
+ else
return on_off("trailers", *argv);
} else if (strcmp(*argv, "arp") == 0) {
NEXT_ARG();
mask |= IFF_NOARP;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
flags &= ~IFF_NOARP;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
flags |= IFF_NOARP;
- } else
- return on_off("noarp", *argv);
+ else
+ return on_off("arp", *argv);
} else if (matches(*argv, "dynamic") == 0) {
NEXT_ARG();
mask |= IFF_DYNAMIC;
- if (strcmp(*argv, "on") == 0) {
+
+ if (strcmp(*argv, "on") == 0)
flags |= IFF_DYNAMIC;
- } else if (strcmp(*argv, "off") == 0) {
+ else if (strcmp(*argv, "off") == 0)
flags &= ~IFF_DYNAMIC;
- } else
+ else
return on_off("dynamic", *argv);
} else {
- if (strcmp(*argv, "dev") == 0) {
+ if (strcmp(*argv, "dev") == 0)
NEXT_ARG();
- }
- if (matches(*argv, "help") == 0)
+ else if (matches(*argv, "help") == 0)
usage();
+
if (dev)
duparg2("dev", *argv);
dev = *argv;
@@ -1154,11 +1218,10 @@ static void do_help(int argc, char **argv)
if (argc <= 0) {
usage();
- return ;
+ return;
}
lu = get_link_kind(*argv);
-
if (lu && lu->print_help)
lu->print_help(lu, argc-1, argv+1, stdout);
else
@@ -1167,40 +1230,43 @@ static void do_help(int argc, char **argv)
int do_iplink(int argc, char **argv)
{
- if (argc > 0) {
- if (iplink_have_newlink()) {
- if (matches(*argv, "add") == 0)
- return iplink_modify(RTM_NEWLINK,
- NLM_F_CREATE|NLM_F_EXCL,
- argc-1, argv+1);
- if (matches(*argv, "set") == 0 ||
- matches(*argv, "change") == 0)
- return iplink_modify(RTM_NEWLINK, 0,
- argc-1, argv+1);
- if (matches(*argv, "replace") == 0)
- return iplink_modify(RTM_NEWLINK,
- NLM_F_CREATE|NLM_F_REPLACE,
- argc-1, argv+1);
- if (matches(*argv, "delete") == 0)
- return iplink_modify(RTM_DELLINK, 0,
- argc-1, argv+1);
- } else {
+ if (argc < 1)
+ return ipaddr_list_link(0, NULL);
+
+ if (iplink_have_newlink()) {
+ if (matches(*argv, "add") == 0)
+ return iplink_modify(RTM_NEWLINK,
+ NLM_F_CREATE|NLM_F_EXCL,
+ argc-1, argv+1);
+ if (matches(*argv, "set") == 0 ||
+ matches(*argv, "change") == 0)
+ return iplink_modify(RTM_NEWLINK, 0,
+ argc-1, argv+1);
+ if (matches(*argv, "replace") == 0)
+ return iplink_modify(RTM_NEWLINK,
+ NLM_F_CREATE|NLM_F_REPLACE,
+ argc-1, argv+1);
+ if (matches(*argv, "delete") == 0)
+ return iplink_modify(RTM_DELLINK, 0,
+ argc-1, argv+1);
+ } else {
#if IPLINK_IOCTL_COMPAT
- if (matches(*argv, "set") == 0)
- return do_set(argc-1, argv+1);
+ if (matches(*argv, "set") == 0)
+ return do_set(argc-1, argv+1);
#endif
- }
- if (matches(*argv, "show") == 0 ||
- matches(*argv, "lst") == 0 ||
- matches(*argv, "list") == 0)
- return ipaddr_list_link(argc-1, argv+1);
- if (matches(*argv, "help") == 0) {
- do_help(argc-1, argv+1);
- return 0;
- }
- } else
- return ipaddr_list_link(0, NULL);
+ }
+
+ if (matches(*argv, "show") == 0 ||
+ matches(*argv, "lst") == 0 ||
+ matches(*argv, "list") == 0)
+ return ipaddr_list_link(argc-1, argv+1);
+
+ if (matches(*argv, "help") == 0) {
+ do_help(argc-1, argv+1);
+ return 0;
+ }
- fprintf(stderr, "Command \"%s\" is unknown, try \"ip link help\".\n", *argv);
+ fprintf(stderr, "Command \"%s\" is unknown, try \"ip link help\".\n",
+ *argv);
exit(-1);
}
diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c
index 3009ec91..cb2f045a 100644
--- a/ip/iplink_bond.c
+++ b/ip/iplink_bond.c
@@ -133,15 +133,19 @@ static void print_explain(FILE *f)
" [ min_links MIN_LINKS ]\n"
" [ lp_interval LP_INTERVAL ]\n"
" [ packets_per_slave PACKETS_PER_SLAVE ]\n"
+ " [ tlb_dynamic_lb TLB_DYNAMIC_LB ]\n"
" [ lacp_rate LACP_RATE ]\n"
" [ ad_select AD_SELECT ]\n"
+ " [ ad_user_port_key PORTKEY ]\n"
+ " [ ad_actor_sys_prio SYSPRIO ]\n"
+ " [ ad_actor_system LLADDR ]\n"
"\n"
"BONDMODE := balance-rr|active-backup|balance-xor|broadcast|802.3ad|balance-tlb|balance-alb\n"
"ARP_VALIDATE := none|active|backup|all\n"
"ARP_ALL_TARGETS := any|all\n"
"PRIMARY_RESELECT := always|better|failure\n"
"FAIL_OVER_MAC := none|active|follow\n"
- "XMIT_HASH_POLICY := layer2|layer2+3|layer3+4\n"
+ "XMIT_HASH_POLICY := layer2|layer2+3|layer3+4|encap2+3|encap3+4\n"
"LACP_RATE := slow|fast\n"
"AD_SELECT := stable|bandwidth|count\n"
);
@@ -157,7 +161,8 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv,
{
__u8 mode, use_carrier, primary_reselect, fail_over_mac;
__u8 xmit_hash_policy, num_peer_notif, all_slaves_active;
- __u8 lacp_rate, ad_select;
+ __u8 lacp_rate, ad_select, tlb_dynamic_lb;
+ __u16 ad_user_port_key, ad_actor_sys_prio;
__u32 miimon, updelay, downdelay, arp_interval, arp_validate;
__u32 arp_all_targets, resend_igmp, min_links, lp_interval;
__u32 packets_per_slave;
@@ -166,10 +171,8 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv,
while (argc > 0) {
if (matches(*argv, "mode") == 0) {
NEXT_ARG();
- if (get_index(mode_tbl, *argv) < 0) {
+ if (get_index(mode_tbl, *argv) < 0)
invarg("invalid mode", *argv);
- return -1;
- }
mode = get_index(mode_tbl, *argv);
addattr8(n, 1024, IFLA_BOND_MODE, mode);
} else if (matches(*argv, "active_slave") == 0) {
@@ -182,38 +185,28 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv,
addattr32(n, 1024, IFLA_BOND_ACTIVE_SLAVE, 0);
} else if (matches(*argv, "miimon") == 0) {
NEXT_ARG();
- if (get_u32(&miimon, *argv, 0)) {
+ if (get_u32(&miimon, *argv, 0))
invarg("invalid miimon", *argv);
- return -1;
- }
addattr32(n, 1024, IFLA_BOND_MIIMON, miimon);
} else if (matches(*argv, "updelay") == 0) {
NEXT_ARG();
- if (get_u32(&updelay, *argv, 0)) {
+ if (get_u32(&updelay, *argv, 0))
invarg("invalid updelay", *argv);
- return -1;
- }
addattr32(n, 1024, IFLA_BOND_UPDELAY, updelay);
} else if (matches(*argv, "downdelay") == 0) {
NEXT_ARG();
- if (get_u32(&downdelay, *argv, 0)) {
+ if (get_u32(&downdelay, *argv, 0))
invarg("invalid downdelay", *argv);
- return -1;
- }
addattr32(n, 1024, IFLA_BOND_DOWNDELAY, downdelay);
} else if (matches(*argv, "use_carrier") == 0) {
NEXT_ARG();
- if (get_u8(&use_carrier, *argv, 0)) {
+ if (get_u8(&use_carrier, *argv, 0))
invarg("invalid use_carrier", *argv);
- return -1;
- }
addattr8(n, 1024, IFLA_BOND_USE_CARRIER, use_carrier);
} else if (matches(*argv, "arp_interval") == 0) {
NEXT_ARG();
- if (get_u32(&arp_interval, *argv, 0)) {
+ if (get_u32(&arp_interval, *argv, 0))
invarg("invalid arp_interval", *argv);
- return -1;
- }
addattr32(n, 1024, IFLA_BOND_ARP_INTERVAL, arp_interval);
} else if (matches(*argv, "arp_ip_target") == 0) {
struct rtattr * nest = addattr_nest(n, 1024,
@@ -234,18 +227,14 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv,
addattr_nest_end(n, nest);
} else if (matches(*argv, "arp_validate") == 0) {
NEXT_ARG();
- if (get_index(arp_validate_tbl, *argv) < 0) {
+ if (get_index(arp_validate_tbl, *argv) < 0)
invarg("invalid arp_validate", *argv);
- return -1;
- }
arp_validate = get_index(arp_validate_tbl, *argv);
addattr32(n, 1024, IFLA_BOND_ARP_VALIDATE, arp_validate);
} else if (matches(*argv, "arp_all_targets") == 0) {
NEXT_ARG();
- if (get_index(arp_all_targets_tbl, *argv) < 0) {
+ if (get_index(arp_all_targets_tbl, *argv) < 0)
invarg("invalid arp_all_targets", *argv);
- return -1;
- }
arp_all_targets = get_index(arp_all_targets_tbl, *argv);
addattr32(n, 1024, IFLA_BOND_ARP_ALL_TARGETS, arp_all_targets);
} else if (matches(*argv, "primary") == 0) {
@@ -256,94 +245,113 @@ static int bond_parse_opt(struct link_util *lu, int argc, char **argv,
addattr32(n, 1024, IFLA_BOND_PRIMARY, ifindex);
} else if (matches(*argv, "primary_reselect") == 0) {
NEXT_ARG();
- if (get_index(primary_reselect_tbl, *argv) < 0) {
+ if (get_index(primary_reselect_tbl, *argv) < 0)
invarg("invalid primary_reselect", *argv);
- return -1;
- }
primary_reselect = get_index(primary_reselect_tbl, *argv);
addattr8(n, 1024, IFLA_BOND_PRIMARY_RESELECT,
primary_reselect);
} else if (matches(*argv, "fail_over_mac") == 0) {
NEXT_ARG();
- if (get_index(fail_over_mac_tbl, *argv) < 0) {
+ if (get_index(fail_over_mac_tbl, *argv) < 0)
invarg("invalid fail_over_mac", *argv);
- return -1;
- }
fail_over_mac = get_index(fail_over_mac_tbl, *argv);
addattr8(n, 1024, IFLA_BOND_FAIL_OVER_MAC,
fail_over_mac);
} else if (matches(*argv, "xmit_hash_policy") == 0) {
NEXT_ARG();
- if (get_index(xmit_hash_policy_tbl, *argv) < 0) {
+ if (get_index(xmit_hash_policy_tbl, *argv) < 0)
invarg("invalid xmit_hash_policy", *argv);
- return -1;
- }
+
xmit_hash_policy = get_index(xmit_hash_policy_tbl, *argv);
addattr8(n, 1024, IFLA_BOND_XMIT_HASH_POLICY,
xmit_hash_policy);
} else if (matches(*argv, "resend_igmp") == 0) {
NEXT_ARG();
- if (get_u32(&resend_igmp, *argv, 0)) {
+ if (get_u32(&resend_igmp, *argv, 0))
invarg("invalid resend_igmp", *argv);
- return -1;
- }
+
addattr32(n, 1024, IFLA_BOND_RESEND_IGMP, resend_igmp);
} else if (matches(*argv, "num_grat_arp") == 0 ||
matches(*argv, "num_unsol_na") == 0) {
NEXT_ARG();
- if (get_u8(&num_peer_notif, *argv, 0)) {
+ if (get_u8(&num_peer_notif, *argv, 0))
invarg("invalid num_grat_arp|num_unsol_na",
*argv);
- return -1;
- }
+
addattr8(n, 1024, IFLA_BOND_NUM_PEER_NOTIF,
num_peer_notif);
} else if (matches(*argv, "all_slaves_active") == 0) {
NEXT_ARG();
- if (get_u8(&all_slaves_active, *argv, 0)) {
+ if (get_u8(&all_slaves_active, *argv, 0))
invarg("invalid all_slaves_active", *argv);
- return -1;
- }
+
addattr8(n, 1024, IFLA_BOND_ALL_SLAVES_ACTIVE,
all_slaves_active);
} else if (matches(*argv, "min_links") == 0) {
NEXT_ARG();
- if (get_u32(&min_links, *argv, 0)) {
+ if (get_u32(&min_links, *argv, 0))
invarg("invalid min_links", *argv);
- return -1;
- }
+
addattr32(n, 1024, IFLA_BOND_MIN_LINKS, min_links);
} else if (matches(*argv, "lp_interval") == 0) {
NEXT_ARG();
- if (get_u32(&lp_interval, *argv, 0)) {
+ if (get_u32(&lp_interval, *argv, 0))
invarg("invalid lp_interval", *argv);
- return -1;
- }
+
addattr32(n, 1024, IFLA_BOND_LP_INTERVAL, lp_interval);
} else if (matches(*argv, "packets_per_slave") == 0) {
NEXT_ARG();
- if (get_u32(&packets_per_slave, *argv, 0)) {
+ if (get_u32(&packets_per_slave, *argv, 0))
invarg("invalid packets_per_slave", *argv);
- return -1;
- }
+
addattr32(n, 1024, IFLA_BOND_PACKETS_PER_SLAVE,
packets_per_slave);
} else if (matches(*argv, "lacp_rate") == 0) {
NEXT_ARG();
- if (get_index(lacp_rate_tbl, *argv) < 0) {
+ if (get_index(lacp_rate_tbl, *argv) < 0)
invarg("invalid lacp_rate", *argv);
- return -1;
- }
+
lacp_rate = get_index(lacp_rate_tbl, *argv);
addattr8(n, 1024, IFLA_BOND_AD_LACP_RATE, lacp_rate);
} else if (matches(*argv, "ad_select") == 0) {
NEXT_ARG();
- if (get_index(ad_select_tbl, *argv) < 0) {
+ if (get_index(ad_select_tbl, *argv) < 0)
invarg("invalid ad_select", *argv);
- return -1;
- }
+
ad_select = get_index(ad_select_tbl, *argv);
addattr8(n, 1024, IFLA_BOND_AD_SELECT, ad_select);
+ } else if (matches(*argv, "ad_user_port_key") == 0) {
+ NEXT_ARG();
+ if (get_u16(&ad_user_port_key, *argv, 0))
+ invarg("invalid ad_user_port_key", *argv);
+
+ addattr16(n, 1024, IFLA_BOND_AD_USER_PORT_KEY,
+ ad_user_port_key);
+ } else if (matches(*argv, "ad_actor_sys_prio") == 0) {
+ NEXT_ARG();
+ if (get_u16(&ad_actor_sys_prio, *argv, 0))
+ invarg("invalid ad_actor_sys_prio", *argv);
+
+ addattr16(n, 1024, IFLA_BOND_AD_ACTOR_SYS_PRIO,
+ ad_actor_sys_prio);
+ } else if (matches(*argv, "ad_actor_system") == 0) {
+ int len;
+ char abuf[32];
+
+ NEXT_ARG();
+ len = ll_addr_a2n(abuf, sizeof(abuf), *argv);
+ if (len < 0)
+ return -1;
+ addattr_l(n, 1024, IFLA_BOND_AD_ACTOR_SYSTEM,
+ abuf, len);
+ } else if (matches(*argv, "tlb_dynamic_lb") == 0) {
+ NEXT_ARG();
+ if (get_u8(&tlb_dynamic_lb, *argv, 0)) {
+ invarg("invalid tlb_dynamic_lb", *argv);
+ return -1;
+ }
+ addattr8(n, 1024, IFLA_BOND_TLB_DYNAMIC_LB,
+ tlb_dynamic_lb);
} else if (matches(*argv, "help") == 0) {
explain();
return -1;
@@ -415,6 +423,7 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
if (iptb[i])
fprintf(f, "%s",
rt_addr_n2a(AF_INET,
+ RTA_PAYLOAD(iptb[i]),
RTA_DATA(iptb[i]),
buf,
INET_ADDRSTRLEN));
@@ -533,6 +542,30 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
ll_addr_n2a(p, ETH_ALEN, 0, b, sizeof(b)));
}
}
+
+ if (tb[IFLA_BOND_AD_ACTOR_SYS_PRIO]) {
+ fprintf(f, "ad_actor_sys_prio %u ",
+ rta_getattr_u16(tb[IFLA_BOND_AD_ACTOR_SYS_PRIO]));
+ }
+
+ if (tb[IFLA_BOND_AD_USER_PORT_KEY]) {
+ fprintf(f, "ad_user_port_key %u ",
+ rta_getattr_u16(tb[IFLA_BOND_AD_USER_PORT_KEY]));
+ }
+
+ if (tb[IFLA_BOND_AD_ACTOR_SYSTEM]) {
+ /* We assume the l2 address is an Ethernet MAC address */
+ SPRINT_BUF(b1);
+ fprintf(f, "ad_actor_system %s ",
+ ll_addr_n2a(RTA_DATA(tb[IFLA_BOND_AD_ACTOR_SYSTEM]),
+ RTA_PAYLOAD(tb[IFLA_BOND_AD_ACTOR_SYSTEM]),
+ 1 /*ARPHDR_ETHER*/, b1, sizeof(b1)));
+ }
+
+ if (tb[IFLA_BOND_TLB_DYNAMIC_LB]) {
+ fprintf(f, "tlb_dynamic_lb %u ",
+ rta_getattr_u8(tb[IFLA_BOND_TLB_DYNAMIC_LB]));
+ }
}
static void bond_print_help(struct link_util *lu, int argc, char **argv,
diff --git a/ip/iplink_bond_slave.c b/ip/iplink_bond_slave.c
index aacba14a..9b569b1d 100644
--- a/ip/iplink_bond_slave.c
+++ b/ip/iplink_bond_slave.c
@@ -78,6 +78,14 @@ static void bond_slave_print_opt(struct link_util *lu, FILE *f, struct rtattr *t
if (tb[IFLA_BOND_SLAVE_AD_AGGREGATOR_ID])
fprintf(f, "ad_aggregator_id %d ",
rta_getattr_u16(tb[IFLA_BOND_SLAVE_AD_AGGREGATOR_ID]));
+
+ if (tb[IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE])
+ fprintf(f, "ad_actor_oper_port_state %d\n",
+ rta_getattr_u8(tb[IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE]));
+
+ if (tb[IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE])
+ fprintf(f, "ad_partner_oper_port_state %d\n",
+ rta_getattr_u16(tb[IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE]));
}
static int bond_slave_parse_opt(struct link_util *lu, int argc, char **argv,
@@ -105,4 +113,3 @@ struct link_util bond_slave_link_util = {
.parse_opt = bond_slave_parse_opt,
.slave = true,
};
-
diff --git a/ip/iplink_bridge.c b/ip/iplink_bridge.c
index 0cea7d1f..00804093 100644
--- a/ip/iplink_bridge.c
+++ b/ip/iplink_bridge.c
@@ -14,18 +14,31 @@
#include <string.h>
#include <linux/if_link.h>
+#include "rt_names.h"
#include "utils.h"
#include "ip_common.h"
-static void explain(void)
+static void print_explain(FILE *f)
{
- fprintf(stderr,
+ fprintf(f,
"Usage: ... bridge [ forward_delay FORWARD_DELAY ]\n"
" [ hello_time HELLO_TIME ]\n"
" [ max_age MAX_AGE ]\n"
+ " [ ageing_time AGEING_TIME ]\n"
+ " [ stp_state STP_STATE ]\n"
+ " [ priority PRIORITY ]\n"
+ " [ vlan_filtering VLAN_FILTERING ]\n"
+ " [ vlan_protocol VLAN_PROTOCOL ]\n"
+ "\n"
+ "Where: VLAN_PROTOCOL := { 802.1Q | 802.1ad }\n"
);
}
+static void explain(void)
+{
+ print_explain(stderr);
+}
+
static int bridge_parse_opt(struct link_util *lu, int argc, char **argv,
struct nlmsghdr *n)
{
@@ -34,25 +47,60 @@ static int bridge_parse_opt(struct link_util *lu, int argc, char **argv,
while (argc > 0) {
if (matches(*argv, "forward_delay") == 0) {
NEXT_ARG();
- if (get_u32(&val, *argv, 0)) {
+ if (get_u32(&val, *argv, 0))
invarg("invalid forward_delay", *argv);
- return -1;
- }
+
addattr32(n, 1024, IFLA_BR_FORWARD_DELAY, val);
} else if (matches(*argv, "hello_time") == 0) {
NEXT_ARG();
- if (get_u32(&val, *argv, 0)) {
+ if (get_u32(&val, *argv, 0))
invarg("invalid hello_time", *argv);
- return -1;
- }
+
addattr32(n, 1024, IFLA_BR_HELLO_TIME, val);
} else if (matches(*argv, "max_age") == 0) {
NEXT_ARG();
- if (get_u32(&val, *argv, 0)) {
+ if (get_u32(&val, *argv, 0))
invarg("invalid max_age", *argv);
+
+ addattr32(n, 1024, IFLA_BR_MAX_AGE, val);
+ } else if (matches(*argv, "ageing_time") == 0) {
+ NEXT_ARG();
+ if (get_u32(&val, *argv, 0))
+ invarg("invalid ageing_time", *argv);
+
+ addattr32(n, 1024, IFLA_BR_AGEING_TIME, val);
+ } else if (matches(*argv, "stp_state") == 0) {
+ NEXT_ARG();
+ if (get_u32(&val, *argv, 0))
+ invarg("invalid stp_state", *argv);
+
+ addattr32(n, 1024, IFLA_BR_STP_STATE, val);
+ } else if (matches(*argv, "priority") == 0) {
+ __u16 prio;
+
+ NEXT_ARG();
+ if (get_u16(&prio, *argv, 0))
+ invarg("invalid priority", *argv);
+
+ addattr16(n, 1024, IFLA_BR_PRIORITY, prio);
+ } else if (matches(*argv, "vlan_filtering") == 0) {
+ __u8 vlan_filter;
+
+ NEXT_ARG();
+ if (get_u8(&vlan_filter, *argv, 0)) {
+ invarg("invalid vlan_filtering", *argv);
return -1;
}
- addattr32(n, 1024, IFLA_BR_MAX_AGE, val);
+ addattr8(n, 1024, IFLA_BR_VLAN_FILTERING, vlan_filter);
+ } else if (matches(*argv, "vlan_protocol") == 0) {
+ __u16 vlan_proto;
+
+ NEXT_ARG();
+ if (ll_proto_a2n(&vlan_proto, *argv)) {
+ invarg("invalid vlan_protocol", *argv);
+ return -1;
+ }
+ addattr16(n, 1024, IFLA_BR_VLAN_PROTOCOL, vlan_proto);
} else if (matches(*argv, "help") == 0) {
explain();
return -1;
@@ -83,6 +131,36 @@ static void bridge_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
if (tb[IFLA_BR_MAX_AGE])
fprintf(f, "max_age %u ",
rta_getattr_u32(tb[IFLA_BR_MAX_AGE]));
+
+ if (tb[IFLA_BR_AGEING_TIME])
+ fprintf(f, "ageing_time %u ",
+ rta_getattr_u32(tb[IFLA_BR_AGEING_TIME]));
+
+ if (tb[IFLA_BR_STP_STATE])
+ fprintf(f, "stp_state %u ",
+ rta_getattr_u32(tb[IFLA_BR_STP_STATE]));
+
+ if (tb[IFLA_BR_PRIORITY])
+ fprintf(f, "priority %u ",
+ rta_getattr_u16(tb[IFLA_BR_PRIORITY]));
+
+ if (tb[IFLA_BR_VLAN_FILTERING])
+ fprintf(f, "vlan_filtering %u ",
+ rta_getattr_u8(tb[IFLA_BR_VLAN_FILTERING]));
+
+ if (tb[IFLA_BR_VLAN_PROTOCOL]) {
+ SPRINT_BUF(b1);
+
+ fprintf(f, "vlan_protocol %s ",
+ ll_proto_n2a(rta_getattr_u16(tb[IFLA_BR_VLAN_PROTOCOL]),
+ b1, sizeof(b1)));
+ }
+}
+
+static void bridge_print_help(struct link_util *lu, int argc, char **argv,
+ FILE *f)
+{
+ print_explain(f);
}
struct link_util bridge_link_util = {
@@ -90,4 +168,5 @@ struct link_util bridge_link_util = {
.maxattr = IFLA_BR_MAX,
.parse_opt = bridge_parse_opt,
.print_opt = bridge_print_opt,
+ .print_help = bridge_print_help,
};
diff --git a/ip/iplink_bridge_slave.c b/ip/iplink_bridge_slave.c
index a2851855..4593872e 100644
--- a/ip/iplink_bridge_slave.c
+++ b/ip/iplink_bridge_slave.c
@@ -19,9 +19,9 @@
#include "utils.h"
#include "ip_common.h"
-static void explain(void)
+static void print_explain(FILE *f)
{
- fprintf(stderr,
+ fprintf(f,
"Usage: ... bridge_slave [ state STATE ] [ priority PRIO ] [cost COST ]\n"
" [ guard {on | off} ]\n"
" [ hairpin {on | off} ] \n"
@@ -32,6 +32,11 @@ static void explain(void)
);
}
+static void explain(void)
+{
+ print_explain(stderr);
+}
+
static const char *port_states[] = {
[BR_STATE_DISABLED] = "disabled",
[BR_STATE_LISTENING] = "listening",
@@ -172,10 +177,17 @@ static int bridge_slave_parse_opt(struct link_util *lu, int argc, char **argv,
return 0;
}
+static void bridge_slave_print_help(struct link_util *lu, int argc, char **argv,
+ FILE *f)
+{
+ print_explain(f);
+}
+
struct link_util bridge_slave_link_util = {
.id = "bridge",
.maxattr = IFLA_BRPORT_MAX,
.print_opt = bridge_slave_print_opt,
.parse_opt = bridge_slave_parse_opt,
+ .print_help = bridge_slave_print_help,
.slave = true,
};
diff --git a/ip/iplink_geneve.c b/ip/iplink_geneve.c
new file mode 100644
index 00000000..13454795
--- /dev/null
+++ b/ip/iplink_geneve.c
@@ -0,0 +1,173 @@
+/*
+ * iplink_geneve.c GENEVE device support
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: John W. Linville <linville@tuxdriver.com>
+ */
+
+#include <stdio.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+
+static void print_explain(FILE *f)
+{
+ fprintf(f, "Usage: ... geneve id VNI remote ADDR\n");
+ fprintf(f, " [ ttl TTL ] [ tos TOS ]\n");
+ fprintf(f, "\n");
+ fprintf(f, "Where: VNI := 0-16777215\n");
+ fprintf(f, " ADDR := IP_ADDRESS\n");
+ fprintf(f, " TOS := { NUMBER | inherit }\n");
+ fprintf(f, " TTL := { 1..255 | inherit }\n");
+}
+
+static void explain(void)
+{
+ print_explain(stderr);
+}
+
+static int geneve_parse_opt(struct link_util *lu, int argc, char **argv,
+ struct nlmsghdr *n)
+{
+ __u32 vni = 0;
+ int vni_set = 0;
+ __u32 daddr = 0;
+ struct in6_addr daddr6 = IN6ADDR_ANY_INIT;
+ __u8 ttl = 0;
+ __u8 tos = 0;
+
+ while (argc > 0) {
+ if (!matches(*argv, "id") ||
+ !matches(*argv, "vni")) {
+ NEXT_ARG();
+ if (get_u32(&vni, *argv, 0) ||
+ vni >= 1u << 24)
+ invarg("invalid id", *argv);
+ vni_set = 1;
+ } else if (!matches(*argv, "remote")) {
+ NEXT_ARG();
+ if (!inet_get_addr(*argv, &daddr, &daddr6)) {
+ fprintf(stderr, "Invalid address \"%s\"\n", *argv);
+ return -1;
+ }
+ if (IN6_IS_ADDR_MULTICAST(&daddr6) || IN_MULTICAST(ntohl(daddr)))
+ invarg("invalid remote address", *argv);
+ } else if (!matches(*argv, "ttl") ||
+ !matches(*argv, "hoplimit")) {
+ unsigned uval;
+
+ NEXT_ARG();
+ if (strcmp(*argv, "inherit") != 0) {
+ if (get_unsigned(&uval, *argv, 0))
+ invarg("invalid TTL", *argv);
+ if (uval > 255)
+ invarg("TTL must be <= 255", *argv);
+ ttl = uval;
+ }
+ } else if (!matches(*argv, "tos") ||
+ !matches(*argv, "dsfield")) {
+ __u32 uval;
+
+ NEXT_ARG();
+ if (strcmp(*argv, "inherit") != 0) {
+ if (rtnl_dsfield_a2n(&uval, *argv))
+ invarg("bad TOS value", *argv);
+ tos = uval;
+ } else
+ tos = 1;
+ } else if (matches(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "geneve: unknown command \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--, argv++;
+ }
+
+ if (!vni_set) {
+ fprintf(stderr, "geneve: missing virtual network identifier\n");
+ return -1;
+ }
+
+ if (!daddr && memcmp(&daddr6, &in6addr_any, sizeof(daddr6)) == 0) {
+ fprintf(stderr, "geneve: remote link partner not specified\n");
+ return -1;
+ }
+
+ addattr32(n, 1024, IFLA_GENEVE_ID, vni);
+ if (daddr)
+ addattr_l(n, 1024, IFLA_GENEVE_REMOTE, &daddr, 4);
+ if (memcmp(&daddr6, &in6addr_any, sizeof(daddr6)) != 0)
+ addattr_l(n, 1024, IFLA_GENEVE_REMOTE6, &daddr6, sizeof(struct in6_addr));
+ addattr8(n, 1024, IFLA_GENEVE_TTL, ttl);
+ addattr8(n, 1024, IFLA_GENEVE_TOS, tos);
+
+ return 0;
+}
+
+static void geneve_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
+{
+ __u32 vni;
+ char s1[1024];
+ __u8 tos;
+
+ if (!tb)
+ return;
+
+ if (!tb[IFLA_GENEVE_ID] ||
+ RTA_PAYLOAD(tb[IFLA_GENEVE_ID]) < sizeof(__u32))
+ return;
+
+ vni = rta_getattr_u32(tb[IFLA_GENEVE_ID]);
+ fprintf(f, "id %u ", vni);
+
+ if (tb[IFLA_GENEVE_REMOTE]) {
+ __be32 addr = rta_getattr_u32(tb[IFLA_GENEVE_REMOTE]);
+ if (addr)
+ fprintf(f, "remote %s ",
+ format_host(AF_INET, 4, &addr, s1, sizeof(s1)));
+ } else if (tb[IFLA_GENEVE_REMOTE6]) {
+ struct in6_addr addr;
+ memcpy(&addr, RTA_DATA(tb[IFLA_GENEVE_REMOTE6]), sizeof(struct in6_addr));
+ if (memcmp(&addr, &in6addr_any, sizeof(addr)) != 0) {
+ if (IN6_IS_ADDR_MULTICAST(&addr))
+ fprintf(f, "remote %s ",
+ format_host(AF_INET6, sizeof(struct in6_addr), &addr, s1, sizeof(s1)));
+ }
+ }
+
+ if (tb[IFLA_GENEVE_TTL]) {
+ __u8 ttl = rta_getattr_u8(tb[IFLA_GENEVE_TTL]);
+ if (ttl)
+ fprintf(f, "ttl %d ", ttl);
+ }
+
+ if (tb[IFLA_GENEVE_TOS] &&
+ (tos = rta_getattr_u8(tb[IFLA_GENEVE_TOS]))) {
+ if (tos == 1)
+ fprintf(f, "tos inherit ");
+ else
+ fprintf(f, "tos %#x ", tos);
+ }
+}
+
+static void geneve_print_help(struct link_util *lu, int argc, char **argv,
+ FILE *f)
+{
+ print_explain(f);
+}
+
+struct link_util geneve_link_util = {
+ .id = "geneve",
+ .maxattr = IFLA_GENEVE_MAX,
+ .parse_opt = geneve_parse_opt,
+ .print_opt = geneve_print_opt,
+ .print_help = geneve_print_help,
+};
diff --git a/ip/iplink_macvlan.c b/ip/iplink_macvlan.c
index 826b6591..f195e81d 100644
--- a/ip/iplink_macvlan.c
+++ b/ip/iplink_macvlan.c
@@ -1,5 +1,5 @@
/*
- * iplink_vlan.c VLAN device support
+ * iplink_macvlan.c macvlan/macvtap device support
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -20,31 +20,40 @@
#include "utils.h"
#include "ip_common.h"
-static void print_explain(FILE *f)
+#define pfx_err(lu, ...) { \
+ fprintf(stderr, "%s: ", lu->id); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+}
+
+static void print_explain(struct link_util *lu, FILE *f)
{
fprintf(f,
- "Usage: ... macvlan mode { private | vepa | bridge | passthru }\n"
+ "Usage: ... %s mode { private | vepa | bridge | passthru [nopromisc] }\n",
+ lu->id
);
}
-static void explain(void)
+static void explain(struct link_util *lu)
{
- print_explain(stderr);
+ print_explain(lu, stderr);
}
-static int mode_arg(void)
+static int mode_arg(const char *arg)
{
fprintf(stderr, "Error: argument of \"mode\" must be \"private\", "
- "\"vepa\", \"bridge\" or \"passthru\" \n");
+ "\"vepa\", \"bridge\" or \"passthru\", not \"%s\"\n", arg);
return -1;
}
static int macvlan_parse_opt(struct link_util *lu, int argc, char **argv,
struct nlmsghdr *n)
{
+ __u32 mode = 0;
+ __u16 flags = 0;
+
while (argc > 0) {
if (matches(*argv, "mode") == 0) {
- __u32 mode = 0;
NEXT_ARG();
if (strcmp(*argv, "private") == 0)
@@ -56,26 +65,39 @@ static int macvlan_parse_opt(struct link_util *lu, int argc, char **argv,
else if (strcmp(*argv, "passthru") == 0)
mode = MACVLAN_MODE_PASSTHRU;
else
- return mode_arg();
-
- addattr32(n, 1024, IFLA_MACVLAN_MODE, mode);
+ return mode_arg(*argv);
+ } else if (matches(*argv, "nopromisc") == 0) {
+ flags |= MACVLAN_FLAG_NOPROMISC;
} else if (matches(*argv, "help") == 0) {
- explain();
+ explain(lu);
return -1;
} else {
- fprintf(stderr, "macvlan: unknown option \"%s\"?\n", *argv);
- explain();
+ pfx_err(lu, "unknown option \"%s\"?", *argv);
+ explain(lu);
return -1;
}
argc--, argv++;
}
+ if (mode)
+ addattr32(n, 1024, IFLA_MACVLAN_MODE, mode);
+
+ if (flags) {
+ if (flags & MACVLAN_FLAG_NOPROMISC &&
+ mode != MACVLAN_MODE_PASSTHRU) {
+ pfx_err(lu, "nopromisc flag only valid in passthru mode");
+ explain(lu);
+ return -1;
+ }
+ addattr16(n, 1024, IFLA_MACVLAN_FLAGS, flags);
+ }
return 0;
}
static void macvlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
{
__u32 mode;
+ __u16 flags;
if (!tb)
return;
@@ -91,12 +113,20 @@ static void macvlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]
: mode == MACVLAN_MODE_BRIDGE ? "bridge"
: mode == MACVLAN_MODE_PASSTHRU ? "passthru"
: "unknown");
+
+ if (!tb[IFLA_MACVLAN_FLAGS] ||
+ RTA_PAYLOAD(tb[IFLA_MACVLAN_FLAGS]) < sizeof(__u16))
+ return;
+
+ flags = rta_getattr_u16(tb[IFLA_MACVLAN_FLAGS]);
+ if (flags & MACVLAN_FLAG_NOPROMISC)
+ fprintf(f, "nopromisc ");
}
static void macvlan_print_help(struct link_util *lu, int argc, char **argv,
FILE *f)
{
- print_explain(f);
+ print_explain(lu, f);
}
struct link_util macvlan_link_util = {
@@ -106,3 +136,11 @@ struct link_util macvlan_link_util = {
.print_opt = macvlan_print_opt,
.print_help = macvlan_print_help,
};
+
+struct link_util macvtap_link_util = {
+ .id = "macvtap",
+ .maxattr = IFLA_MACVLAN_MAX,
+ .parse_opt = macvlan_parse_opt,
+ .print_opt = macvlan_print_opt,
+ .print_help = macvlan_print_help,
+};
diff --git a/ip/iplink_macvtap.c b/ip/iplink_macvtap.c
deleted file mode 100644
index 9c2cd74d..00000000
--- a/ip/iplink_macvtap.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * iplink_macvtap.c macvtap device support
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <linux/if_link.h>
-
-#include "rt_names.h"
-#include "utils.h"
-#include "ip_common.h"
-
-static void print_explain(FILE *f)
-{
- fprintf(stderr,
- "Usage: ... macvtap mode { private | vepa | bridge | passthru }\n"
- );
-}
-
-static void explain(void)
-{
- print_explain(stderr);
-}
-
-static int mode_arg(const char *arg)
-{
- fprintf(stderr, "Error: argument of \"mode\" must be \"private\", "
- "\"vepa\", \"bridge\" or \"passthru\", not \"%s\"\n", arg);
- return -1;
-}
-
-static int macvtap_parse_opt(struct link_util *lu, int argc, char **argv,
- struct nlmsghdr *n)
-{
- while (argc > 0) {
- if (matches(*argv, "mode") == 0) {
- __u32 mode = 0;
- NEXT_ARG();
-
- if (strcmp(*argv, "private") == 0)
- mode = MACVLAN_MODE_PRIVATE;
- else if (strcmp(*argv, "vepa") == 0)
- mode = MACVLAN_MODE_VEPA;
- else if (strcmp(*argv, "bridge") == 0)
- mode = MACVLAN_MODE_BRIDGE;
- else if (strcmp(*argv, "passthru") == 0)
- mode = MACVLAN_MODE_PASSTHRU;
- else
- return mode_arg(*argv);
-
- addattr32(n, 1024, IFLA_MACVLAN_MODE, mode);
- } else if (matches(*argv, "help") == 0) {
- explain();
- return -1;
- } else {
- fprintf(stderr, "macvtap: unknown command \"%s\"?\n", *argv);
- explain();
- return -1;
- }
- argc--, argv++;
- }
-
- return 0;
-}
-
-static void macvtap_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
-{
- __u32 mode;
-
- if (!tb)
- return;
-
- if (!tb[IFLA_MACVLAN_MODE] ||
- RTA_PAYLOAD(tb[IFLA_MACVLAN_MODE]) < sizeof(__u32))
- return;
-
- mode = rta_getattr_u32(tb[IFLA_VLAN_ID]);
- fprintf(f, " mode %s ",
- mode == MACVLAN_MODE_PRIVATE ? "private"
- : mode == MACVLAN_MODE_VEPA ? "vepa"
- : mode == MACVLAN_MODE_BRIDGE ? "bridge"
- : mode == MACVLAN_MODE_PASSTHRU ? "passthru"
- : "unknown");
-}
-
-static void macvtap_print_help(struct link_util *lu, int argc, char **argv,
- FILE *f)
-{
- print_explain(f);
-}
-
-struct link_util macvtap_link_util = {
- .id = "macvtap",
- .maxattr = IFLA_MACVLAN_MAX,
- .parse_opt = macvtap_parse_opt,
- .print_opt = macvtap_print_opt,
- .print_help = macvtap_print_help,
-};
diff --git a/ip/iplink_vrf.c b/ip/iplink_vrf.c
new file mode 100644
index 00000000..9b4b7728
--- /dev/null
+++ b/ip/iplink_vrf.c
@@ -0,0 +1,79 @@
+/* iplink_vrf.c VRF device support
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Shrijeet Mukherjee <shm@cumulusnetworks.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <linux/if_link.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+
+static void vrf_explain(FILE *f)
+{
+ fprintf(f, "Usage: ... vrf table TABLEID \n");
+}
+
+static void explain(void)
+{
+ vrf_explain(stderr);
+}
+
+static int vrf_parse_opt(struct link_util *lu, int argc, char **argv,
+ struct nlmsghdr *n)
+{
+ while (argc > 0) {
+ if (matches(*argv, "table") == 0) {
+ __u32 table;
+
+ NEXT_ARG();
+
+ if (rtnl_rttable_a2n(&table, *argv))
+ invarg("invalid table ID\n", *argv);
+ addattr32(n, 1024, IFLA_VRF_TABLE, table);
+ } else if (matches(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "vrf: unknown option \"%s\"?\n",
+ *argv);
+ explain();
+ return -1;
+ }
+ argc--, argv++;
+ }
+
+ return 0;
+}
+
+static void vrf_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
+{
+ if (!tb)
+ return;
+
+ if (tb[IFLA_VRF_TABLE])
+ fprintf(f, "table %u ", rta_getattr_u32(tb[IFLA_VRF_TABLE]));
+}
+
+static void vrf_print_help(struct link_util *lu, int argc, char **argv,
+ FILE *f)
+{
+ vrf_explain(f);
+}
+
+struct link_util vrf_link_util = {
+ .id = "vrf",
+ .maxattr = IFLA_VRF_MAX,
+ .parse_opt = vrf_parse_opt,
+ .print_opt = vrf_print_opt,
+ .print_help = vrf_print_help,
+};
diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c
index 473ff97a..ede84824 100644
--- a/ip/iplink_vxlan.c
+++ b/ip/iplink_vxlan.c
@@ -23,14 +23,15 @@
static void print_explain(FILE *f)
{
- fprintf(f, "Usage: ... vxlan id VNI [ { group | remote } ADDR ] [ local ADDR ]\n");
+ fprintf(f, "Usage: ... vxlan id VNI [ { group | remote } IP_ADDRESS ] [ local ADDR ]\n");
fprintf(f, " [ ttl TTL ] [ tos TOS ] [ dev PHYS_DEV ]\n");
fprintf(f, " [ dstport PORT ] [ srcport MIN MAX ]\n");
fprintf(f, " [ [no]learning ] [ [no]proxy ] [ [no]rsc ]\n");
fprintf(f, " [ [no]l2miss ] [ [no]l3miss ]\n");
fprintf(f, " [ ageing SECONDS ] [ maxaddress NUMBER ]\n");
fprintf(f, " [ [no]udpcsum ] [ [no]udp6zerocsumtx ] [ [no]udp6zerocsumrx ]\n");
- fprintf(f, " [ gbp ]\n");
+ fprintf(f, " [ [no]remcsumtx ] [ [no]remcsumrx ]\n");
+ fprintf(f, " [ [no]external ] [ gbp ]\n");
fprintf(f, "\n");
fprintf(f, "Where: VNI := 0-16777215\n");
fprintf(f, " ADDR := { IP_ADDRESS | any }\n");
@@ -69,6 +70,9 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
__u8 udpcsum = 0;
__u8 udp6zerocsumtx = 0;
__u8 udp6zerocsumrx = 0;
+ __u8 remcsumtx = 0;
+ __u8 remcsumrx = 0;
+ __u8 metadata = 0;
__u8 gbp = 0;
int dst_port_set = 0;
struct ifla_vxlan_port_range range = { 0, 0 };
@@ -199,6 +203,18 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
udp6zerocsumrx = 1;
} else if (!matches(*argv, "noudp6zerocsumrx")) {
udp6zerocsumrx = 0;
+ } else if (!matches(*argv, "remcsumtx")) {
+ remcsumtx = 1;
+ } else if (!matches(*argv, "noremcsumtx")) {
+ remcsumtx = 0;
+ } else if (!matches(*argv, "remcsumrx")) {
+ remcsumrx = 1;
+ } else if (!matches(*argv, "noremcsumrx")) {
+ remcsumrx = 0;
+ } else if (!matches(*argv, "external")) {
+ metadata = 1;
+ } else if (!matches(*argv, "noexternal")) {
+ metadata = 0;
} else if (!matches(*argv, "gbp")) {
gbp = 1;
} else if (matches(*argv, "help") == 0) {
@@ -212,7 +228,12 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
argc--, argv++;
}
- if (!vni_set) {
+ if (metadata && vni_set) {
+ fprintf(stderr, "vxlan: both 'external' and vni cannot be specified\n");
+ return -1;
+ }
+
+ if (!metadata && !vni_set) {
fprintf(stderr, "vxlan: missing virtual network identifier\n");
return -1;
}
@@ -259,6 +280,9 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
addattr8(n, 1024, IFLA_VXLAN_UDP_CSUM, udpcsum);
addattr8(n, 1024, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, udp6zerocsumtx);
addattr8(n, 1024, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, udp6zerocsumrx);
+ addattr8(n, 1024, IFLA_VXLAN_REMCSUM_TX, remcsumtx);
+ addattr8(n, 1024, IFLA_VXLAN_REMCSUM_RX, remcsumrx);
+ addattr8(n, 1024, IFLA_VXLAN_COLLECT_METADATA, metadata);
if (noage)
addattr32(n, 1024, IFLA_VXLAN_AGEING, 0);
@@ -407,6 +431,18 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
rta_getattr_u8(tb[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
fputs("udp6zerocsumrx ", f);
+ if (tb[IFLA_VXLAN_REMCSUM_TX] &&
+ rta_getattr_u8(tb[IFLA_VXLAN_REMCSUM_TX]))
+ fputs("remcsumtx ", f);
+
+ if (tb[IFLA_VXLAN_REMCSUM_RX] &&
+ rta_getattr_u8(tb[IFLA_VXLAN_REMCSUM_RX]))
+ fputs("remcsumrx ", f);
+
+ if (tb[IFLA_VXLAN_COLLECT_METADATA] &&
+ rta_getattr_u8(tb[IFLA_VXLAN_COLLECT_METADATA]))
+ fputs("external ", f);
+
if (tb[IFLA_VXLAN_GBP])
fputs("gbp ", f);
}
diff --git a/ip/ipmaddr.c b/ip/ipmaddr.c
index a77a18fb..cbd6d115 100644
--- a/ip/ipmaddr.c
+++ b/ip/ipmaddr.c
@@ -257,7 +257,7 @@ static int multiaddr_list(int argc, char **argv)
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
}
- if (matches(*argv, "help") == 0)
+ else if (matches(*argv, "help") == 0)
usage();
if (filter.dev)
duparg2("dev", *argv);
diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c
index 6b5e6653..99a237f4 100644
--- a/ip/ipmonitor.c
+++ b/ip/ipmonitor.c
@@ -26,18 +26,36 @@
static void usage(void) __attribute__((noreturn));
int prefix_banner;
+int listen_all_nsid;
static void usage(void)
{
- fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ] [ FILE ]"
- "[ label ] [dev DEVICE]\n");
+ fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ] [ FILE ] "
+ "[ label ] [all-nsid] [dev DEVICE]\n");
fprintf(stderr, "LISTofOBJECTS := link | address | route | mroute | prefix |\n");
- fprintf(stderr, " neigh | netconf | rule\n");
+ fprintf(stderr, " neigh | netconf | rule | nsid\n");
fprintf(stderr, "FILE := file FILENAME\n");
exit(-1);
}
+static void print_headers(FILE *fp, char *label, struct rtnl_ctrl_data *ctrl)
+{
+ if (timestamp)
+ print_timestamp(fp);
+
+ if (listen_all_nsid) {
+ if (ctrl == NULL || ctrl->nsid < 0)
+ fprintf(fp, "[nsid current]");
+ else
+ fprintf(fp, "[nsid %d]", ctrl->nsid);
+ }
+
+ if (prefix_banner)
+ fprintf(fp, "%s", label);
+}
+
static int accept_msg(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
@@ -54,42 +72,31 @@ static int accept_msg(const struct sockaddr_nl *who,
if (r->rtm_flags & RTM_F_CLONED)
return 0;
- if (timestamp)
- print_timestamp(fp);
-
if (r->rtm_family == RTNL_FAMILY_IPMR ||
r->rtm_family == RTNL_FAMILY_IP6MR) {
- if (prefix_banner)
- fprintf(fp, "[MROUTE]");
+ print_headers(fp, "[MROUTE]", ctrl);
print_mroute(who, n, arg);
return 0;
} else {
- if (prefix_banner)
- fprintf(fp, "[ROUTE]");
+ print_headers(fp, "[ROUTE]", ctrl);
print_route(who, n, arg);
return 0;
}
}
- if (timestamp)
- print_timestamp(fp);
-
if (n->nlmsg_type == RTM_NEWLINK || n->nlmsg_type == RTM_DELLINK) {
ll_remember_index(who, n, NULL);
- if (prefix_banner)
- fprintf(fp, "[LINK]");
+ print_headers(fp, "[LINK]", ctrl);
print_linkinfo(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWADDR || n->nlmsg_type == RTM_DELADDR) {
- if (prefix_banner)
- fprintf(fp, "[ADDR]");
+ print_headers(fp, "[ADDR]", ctrl);
print_addrinfo(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWADDRLABEL || n->nlmsg_type == RTM_DELADDRLABEL) {
- if (prefix_banner)
- fprintf(fp, "[ADDRLABEL]");
+ print_headers(fp, "[ADDRLABEL]", ctrl);
print_addrlabel(who, n, arg);
return 0;
}
@@ -102,33 +109,34 @@ static int accept_msg(const struct sockaddr_nl *who,
return 0;
}
- if (prefix_banner)
- fprintf(fp, "[NEIGH]");
+ print_headers(fp, "[NEIGH]", ctrl);
print_neigh(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWPREFIX) {
- if (prefix_banner)
- fprintf(fp, "[PREFIX]");
+ print_headers(fp, "[PREFIX]", ctrl);
print_prefix(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWRULE || n->nlmsg_type == RTM_DELRULE) {
- if (prefix_banner)
- fprintf(fp, "[RULE]");
+ print_headers(fp, "[RULE]", ctrl);
print_rule(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWNETCONF) {
- if (prefix_banner)
- fprintf(fp, "[NETCONF]");
- print_netconf(who, n, arg);
+ print_headers(fp, "[NETCONF]", ctrl);
+ print_netconf(who, ctrl, n, arg);
return 0;
}
if (n->nlmsg_type == NLMSG_TSTAMP) {
print_nlmsg_timestamp(fp, n);
return 0;
}
+ if (n->nlmsg_type == RTM_NEWNSID || n->nlmsg_type == RTM_DELNSID) {
+ print_headers(fp, "[NSID]", ctrl);
+ print_nsid(who, n, arg);
+ return 0;
+ }
if (n->nlmsg_type != NLMSG_ERROR && n->nlmsg_type != NLMSG_NOOP &&
n->nlmsg_type != NLMSG_DONE) {
fprintf(fp, "Unknown message: type=0x%08x(%d) flags=0x%08x(%d)"
@@ -151,6 +159,7 @@ int do_ipmonitor(int argc, char **argv)
int lneigh=0;
int lnetconf=0;
int lrule=0;
+ int lnsid=0;
int ifindex=0;
groups |= nl_mgrp(RTNLGRP_LINK);
@@ -158,6 +167,7 @@ int do_ipmonitor(int argc, char **argv)
groups |= nl_mgrp(RTNLGRP_IPV6_IFADDR);
groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+ groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
groups |= nl_mgrp(RTNLGRP_IPV4_MROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_MROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_PREFIX);
@@ -166,6 +176,7 @@ int do_ipmonitor(int argc, char **argv)
groups |= nl_mgrp(RTNLGRP_IPV6_NETCONF);
groups |= nl_mgrp(RTNLGRP_IPV4_RULE);
groups |= nl_mgrp(RTNLGRP_IPV6_RULE);
+ groups |= nl_mgrp(RTNLGRP_NSID);
rtnl_close(&rth);
@@ -175,6 +186,8 @@ int do_ipmonitor(int argc, char **argv)
file = *argv;
} else if (matches(*argv, "label") == 0) {
prefix_banner = 1;
+ } else if (matches(*argv, "all-nsid") == 0) {
+ listen_all_nsid = 1;
} else if (matches(*argv, "link") == 0) {
llink=1;
groups = 0;
@@ -199,6 +212,9 @@ int do_ipmonitor(int argc, char **argv)
} else if (matches(*argv, "rule") == 0) {
lrule = 1;
groups = 0;
+ } else if (matches(*argv, "nsid") == 0) {
+ lnsid = 1;
+ groups = 0;
} else if (strcmp(*argv, "all") == 0) {
prefix_banner=1;
} else if (matches(*argv, "help") == 0) {
@@ -235,6 +251,8 @@ int do_ipmonitor(int argc, char **argv)
groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
if (!preferred_family || preferred_family == AF_INET6)
groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+ if (!preferred_family || preferred_family == AF_MPLS)
+ groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
}
if (lmroute) {
if (!preferred_family || preferred_family == AF_INET)
@@ -261,19 +279,30 @@ int do_ipmonitor(int argc, char **argv)
if (!preferred_family || preferred_family == AF_INET6)
groups |= nl_mgrp(RTNLGRP_IPV6_RULE);
}
+ if (lnsid) {
+ groups |= nl_mgrp(RTNLGRP_NSID);
+ }
if (file) {
FILE *fp;
+ int err;
+
fp = fopen(file, "r");
if (fp == NULL) {
perror("Cannot fopen");
exit(-1);
}
- return rtnl_from_file(fp, accept_msg, stdout);
+ err = rtnl_from_file(fp, accept_msg, stdout);
+ fclose(fp);
+ return err;
}
if (rtnl_open(&rth, groups) < 0)
exit(1);
+ if (listen_all_nsid && rtnl_listen_all_nsid(&rth) < 0)
+ exit(1);
+
ll_init_map(&rth);
+ netns_map_init();
if (rtnl_listen(&rth, accept_msg, stdout) < 0)
exit(2);
diff --git a/ip/ipmroute.c b/ip/ipmroute.c
index b4ed9f15..fffa9e2c 100644
--- a/ip/ipmroute.c
+++ b/ip/ipmroute.c
@@ -67,8 +67,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
int family;
if ((n->nlmsg_type != RTM_NEWROUTE &&
- n->nlmsg_type != RTM_DELROUTE) ||
- !(n->nlmsg_flags & NLM_F_MULTI)) {
+ n->nlmsg_type != RTM_DELROUTE)) {
fprintf(stderr, "Not a multicast route: %08x %08x %08x\n",
n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
return 0;
@@ -98,15 +97,25 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (filter.af && filter.af != r->rtm_family)
return 0;
- if (tb[RTA_DST] &&
- filter.mdst.bitlen > 0 &&
- inet_addr_match(RTA_DATA(tb[RTA_DST]), &filter.mdst, filter.mdst.bitlen))
- return 0;
+ if (tb[RTA_DST] && filter.mdst.bitlen > 0) {
+ inet_prefix dst;
- if (tb[RTA_SRC] &&
- filter.msrc.bitlen > 0 &&
- inet_addr_match(RTA_DATA(tb[RTA_SRC]), &filter.msrc, filter.msrc.bitlen))
- return 0;
+ memset(&dst, 0, sizeof(dst));
+ dst.family = r->rtm_family;
+ memcpy(&dst.data, RTA_DATA(tb[RTA_DST]), RTA_PAYLOAD(tb[RTA_DST]));
+ if (inet_addr_match(&dst, &filter.mdst, filter.mdst.bitlen))
+ return 0;
+ }
+
+ if (tb[RTA_SRC] && filter.msrc.bitlen > 0) {
+ inet_prefix src;
+
+ memset(&src, 0, sizeof(src));
+ src.family = r->rtm_family;
+ memcpy(&src.data, RTA_DATA(tb[RTA_SRC]), RTA_PAYLOAD(tb[RTA_SRC]));
+ if (inet_addr_match(&src, &filter.msrc, filter.msrc.bitlen))
+ return 0;
+ }
family = r->rtm_family == RTNL_FAMILY_IPMR ? AF_INET : AF_INET6;
@@ -116,6 +125,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (tb[RTA_SRC])
len = snprintf(obuf, sizeof(obuf),
"(%s, ", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[RTA_SRC]),
RTA_DATA(tb[RTA_SRC]),
abuf, sizeof(abuf)));
else
@@ -123,6 +133,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (tb[RTA_DST])
snprintf(obuf + len, sizeof(obuf) - len,
"%s)", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[RTA_DST]),
RTA_DATA(tb[RTA_DST]),
abuf, sizeof(abuf)));
else
diff --git a/ip/ipneigh.c b/ip/ipneigh.c
index eeec7bdd..92b7cd6f 100644
--- a/ip/ipneigh.c
+++ b/ip/ipneigh.c
@@ -39,6 +39,7 @@ static struct
char *flushb;
int flushp;
int flushe;
+ int master;
} filter;
static void usage(void) __attribute__((noreturn));
@@ -99,8 +100,9 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
struct ndmsg ndm;
char buf[256];
} req;
- char *d = NULL;
+ char *dev = NULL;
int dst_ok = 0;
+ int dev_ok = 0;
int lladdr_ok = 0;
char * lla = NULL;
inet_prefix dst;
@@ -134,10 +136,12 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
duparg("address", *argv);
get_addr(&dst, *argv, preferred_family);
dst_ok = 1;
+ dev_ok = 1;
req.ndm.ndm_flags |= NTF_PROXY;
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
- d = *argv;
+ dev = *argv;
+ dev_ok = 1;
} else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
@@ -152,7 +156,7 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
}
argc--; argv++;
}
- if (d == NULL || !dst_ok || dst.family == AF_UNSPEC) {
+ if (!dev_ok || !dst_ok || dst.family == AF_UNSPEC) {
fprintf(stderr, "Device and destination are required arguments.\n");
exit(-1);
}
@@ -174,12 +178,12 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
ll_init_map(&rth);
- if ((req.ndm.ndm_ifindex = ll_name_to_index(d)) == 0) {
- fprintf(stderr, "Cannot find device \"%s\"\n", d);
+ if (dev && (req.ndm.ndm_ifindex = ll_name_to_index(dev)) == 0) {
+ fprintf(stderr, "Cannot find device \"%s\"\n", dev);
return -1;
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
exit(2);
return 0;
@@ -193,6 +197,7 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
int len = n->nlmsg_len;
struct rtattr * tb[NDA_MAX+1];
char abuf[256];
+ static int logit = 1;
if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH &&
n->nlmsg_type != RTM_GETNEIGH) {
@@ -220,6 +225,14 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
(r->ndm_family != AF_DECnet))
return 0;
+ if (filter.master && !(n->nlmsg_flags & NLM_F_DUMP_FILTERED)) {
+ if (logit) {
+ logit = 0;
+ fprintf(fp,
+ "\nWARNING: Kernel does not support filtering by master device\n\n");
+ }
+ }
+
parse_rtattr(tb, NDA_MAX, NDA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
if (tb[NDA_DST]) {
@@ -256,7 +269,7 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
}
if (n->nlmsg_type == RTM_DELNEIGH)
- fprintf(fp, "delete ");
+ fprintf(fp, "Deleted ");
else if (n->nlmsg_type == RTM_GETNEIGH)
fprintf(fp, "miss ");
if (tb[NDA_DST]) {
@@ -327,9 +340,18 @@ void ipneigh_reset_filter(int ifindex)
static int do_show_or_flush(int argc, char **argv, int flush)
{
+ struct {
+ struct nlmsghdr n;
+ struct ndmsg ndm;
+ char buf[256];
+ } req;
char *filter_dev = NULL;
int state_given = 0;
- struct ndmsg ndm = { 0 };
+
+ memset(&req, 0, sizeof(req));
+
+ req.n.nlmsg_type = RTM_GETNEIGH;
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
ipneigh_reset_filter(0);
@@ -351,6 +373,14 @@ static int do_show_or_flush(int argc, char **argv, int flush)
if (filter_dev)
duparg("dev", *argv);
filter_dev = *argv;
+ } else if (strcmp(*argv, "master") == 0) {
+ int ifindex;
+ NEXT_ARG();
+ ifindex = ll_name_to_index(*argv);
+ if (!ifindex)
+ invarg("Device does not exist\n", *argv);
+ addattr32(&req.n, sizeof(req), NDA_MASTER, ifindex);
+ filter.master = ifindex;
} else if (strcmp(*argv, "unused") == 0) {
filter.unused_only = 1;
} else if (strcmp(*argv, "nud") == 0) {
@@ -371,7 +401,7 @@ static int do_show_or_flush(int argc, char **argv, int flush)
state = 0x100;
filter.state |= state;
} else if (strcmp(*argv, "proxy") == 0)
- ndm.ndm_flags = NTF_PROXY;
+ req.ndm.ndm_flags = NTF_PROXY;
else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
@@ -392,6 +422,7 @@ static int do_show_or_flush(int argc, char **argv, int flush)
fprintf(stderr, "Cannot find device \"%s\"\n", filter_dev);
return -1;
}
+ addattr32(&req.n, sizeof(req), NDA_IFINDEX, filter.index);
}
if (flush) {
@@ -436,9 +467,9 @@ static int do_show_or_flush(int argc, char **argv, int flush)
return 1;
}
- ndm.ndm_family = filter.family;
+ req.ndm.ndm_family = filter.family;
- if (rtnl_dump_request(&rth, RTM_GETNEIGH, &ndm, sizeof(struct ndmsg)) < 0) {
+ if (rtnl_dump_request_n(&rth, &req.n) < 0) {
perror("Cannot send dump request");
exit(1);
}
diff --git a/ip/ipnetconf.c b/ip/ipnetconf.c
index aa31ead0..eca6eeee 100644
--- a/ip/ipnetconf.c
+++ b/ip/ipnetconf.c
@@ -40,7 +40,8 @@ static void usage(void)
#define NETCONF_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct netconfmsg))))
-int print_netconf(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+int print_netconf(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
struct netconfmsg *ncm = NLMSG_DATA(n);
@@ -123,6 +124,12 @@ int print_netconf(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
return 0;
}
+static int print_netconf2(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg)
+{
+ return print_netconf(who, NULL, n, arg);
+}
+
void ipnetconf_reset_filter(int ifindex)
{
memset(&filter, 0, sizeof(filter));
@@ -177,7 +184,7 @@ dump:
perror("Cannot send dump request");
exit(1);
}
- if (rtnl_dump_filter(&rth, print_netconf, stdout) < 0) {
+ if (rtnl_dump_filter(&rth, print_netconf2, stdout) < 0) {
fprintf(stderr, "Dump terminated\n");
exit(1);
}
diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 24df167e..088096f6 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -4,7 +4,6 @@
#include <sys/wait.h>
#include <sys/inotify.h>
#include <sys/mount.h>
-#include <sys/param.h>
#include <sys/syscall.h>
#include <stdio.h>
#include <string.h>
@@ -14,10 +13,12 @@
#include <errno.h>
#include <unistd.h>
#include <ctype.h>
+#include <linux/limits.h>
#include <linux/net_namespace.h>
#include "utils.h"
+#include "hlist.h"
#include "ip_common.h"
#include "namespace.h"
@@ -31,12 +32,17 @@ static int usage(void)
fprintf(stderr, " ip netns pids NAME\n");
fprintf(stderr, " ip [-all] netns exec [NAME] cmd ...\n");
fprintf(stderr, " ip netns monitor\n");
+ fprintf(stderr, " ip netns list-id\n");
exit(-1);
}
+/* This socket is used to get nsid */
+static struct rtnl_handle rtnsh = { .fd = -1 };
+
static int have_rtnl_getnsid = -1;
static int ipnetns_accept_msg(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
struct nlmsghdr *n, void *arg)
{
struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(n);
@@ -106,7 +112,7 @@ static int get_netnsid_from_name(const char *name)
return fd;
addattr32(&req.n, 1024, NETNSA_FD, fd);
- if (rtnl_talk(&rth, &req.n, 0, 0, &answer.n) < 0) {
+ if (rtnl_talk(&rtnsh, &req.n, &answer.n, sizeof(answer)) < 0) {
close(fd);
return -2;
}
@@ -129,6 +135,196 @@ static int get_netnsid_from_name(const char *name)
return -1;
}
+struct nsid_cache {
+ struct hlist_node nsid_hash;
+ struct hlist_node name_hash;
+ int nsid;
+ char name[0];
+};
+
+#define NSIDMAP_SIZE 128
+#define NSID_HASH_NSID(nsid) (nsid & (NSIDMAP_SIZE - 1))
+#define NSID_HASH_NAME(name) (namehash(name) & (NSIDMAP_SIZE - 1))
+
+static struct hlist_head nsid_head[NSIDMAP_SIZE];
+static struct hlist_head name_head[NSIDMAP_SIZE];
+
+static struct nsid_cache *netns_map_get_by_nsid(int nsid)
+{
+ uint32_t h = NSID_HASH_NSID(nsid);
+ struct hlist_node *n;
+
+ hlist_for_each(n, &nsid_head[h]) {
+ struct nsid_cache *c = container_of(n, struct nsid_cache,
+ nsid_hash);
+ if (c->nsid == nsid)
+ return c;
+ }
+
+ return NULL;
+}
+
+static int netns_map_add(int nsid, const char *name)
+{
+ struct nsid_cache *c;
+ uint32_t h;
+
+ if (netns_map_get_by_nsid(nsid) != NULL)
+ return -EEXIST;
+
+ c = malloc(sizeof(*c) + strlen(name));
+ if (c == NULL) {
+ perror("malloc");
+ return -ENOMEM;
+ }
+ c->nsid = nsid;
+ strcpy(c->name, name);
+
+ h = NSID_HASH_NSID(nsid);
+ hlist_add_head(&c->nsid_hash, &nsid_head[h]);
+
+ h = NSID_HASH_NAME(name);
+ hlist_add_head(&c->name_hash, &name_head[h]);
+
+ return 0;
+}
+
+static void netns_map_del(struct nsid_cache *c)
+{
+ hlist_del(&c->name_hash);
+ hlist_del(&c->nsid_hash);
+ free(c);
+}
+
+void netns_map_init(void)
+{
+ static int initialized;
+ struct dirent *entry;
+ DIR *dir;
+ int nsid;
+
+ if (initialized || !ipnetns_have_nsid())
+ return;
+
+ if (rtnl_open(&rtnsh, 0) < 0) {
+ fprintf(stderr, "Cannot open rtnetlink\n");
+ exit(1);
+ }
+
+ dir = opendir(NETNS_RUN_DIR);
+ if (!dir)
+ return;
+
+ while ((entry = readdir(dir)) != NULL) {
+ if (strcmp(entry->d_name, ".") == 0)
+ continue;
+ if (strcmp(entry->d_name, "..") == 0)
+ continue;
+ nsid = get_netnsid_from_name(entry->d_name);
+
+ if (nsid >= 0)
+ netns_map_add(nsid, entry->d_name);
+ }
+ closedir(dir);
+ initialized = 1;
+}
+
+static int netns_get_name(int nsid, char *name)
+{
+ struct dirent *entry;
+ DIR *dir;
+ int id;
+
+ dir = opendir(NETNS_RUN_DIR);
+ if (!dir)
+ return -ENOENT;
+
+ while ((entry = readdir(dir)) != NULL) {
+ if (strcmp(entry->d_name, ".") == 0)
+ continue;
+ if (strcmp(entry->d_name, "..") == 0)
+ continue;
+ id = get_netnsid_from_name(entry->d_name);
+
+ if (nsid == id) {
+ strcpy(name, entry->d_name);
+ closedir(dir);
+ return 0;
+ }
+ }
+ closedir(dir);
+ return -ENOENT;
+}
+
+int print_nsid(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+ struct rtgenmsg *rthdr = NLMSG_DATA(n);
+ struct rtattr *tb[NETNSA_MAX+1];
+ int len = n->nlmsg_len;
+ FILE *fp = (FILE *)arg;
+ struct nsid_cache *c;
+ char name[NAME_MAX];
+ int nsid;
+
+ if (n->nlmsg_type != RTM_NEWNSID && n->nlmsg_type != RTM_DELNSID)
+ return 0;
+
+ len -= NLMSG_SPACE(sizeof(*rthdr));
+ if (len < 0) {
+ fprintf(stderr, "BUG: wrong nlmsg len %d in %s\n", len,
+ __func__);
+ return -1;
+ }
+
+ parse_rtattr(tb, NETNSA_MAX, NETNS_RTA(rthdr), len);
+ if (tb[NETNSA_NSID] == NULL) {
+ fprintf(stderr, "BUG: NETNSA_NSID is missing %s\n", __func__);
+ return -1;
+ }
+
+ if (n->nlmsg_type == RTM_DELNSID)
+ fprintf(fp, "Deleted ");
+
+ nsid = rta_getattr_u32(tb[NETNSA_NSID]);
+ fprintf(fp, "nsid %u ", nsid);
+
+ c = netns_map_get_by_nsid(nsid);
+ if (c != NULL) {
+ fprintf(fp, "(iproute2 netns name: %s)", c->name);
+ netns_map_del(c);
+ }
+
+ /* During 'ip monitor nsid', no chance to have new nsid in cache. */
+ if (c == NULL && n->nlmsg_type == RTM_NEWNSID)
+ if (netns_get_name(nsid, name) == 0) {
+ fprintf(fp, "(iproute2 netns name: %s)", name);
+ netns_map_add(nsid, name);
+ }
+
+ fprintf(fp, "\n");
+ fflush(fp);
+ return 0;
+}
+
+static int netns_list_id(int argc, char **argv)
+{
+ if (!ipnetns_have_nsid()) {
+ fprintf(stderr,
+ "RTM_GETNSID is not supported by the kernel.\n");
+ return -ENOTSUP;
+ }
+
+ if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETNSID) < 0) {
+ perror("Cannot send dump request");
+ exit(1);
+ }
+ if (rtnl_dump_filter(&rth, print_nsid, stdout) < 0) {
+ fprintf(stderr, "Dump terminated\n");
+ exit(1);
+ }
+ return 0;
+}
+
static int netns_list(int argc, char **argv)
{
struct dirent *entry;
@@ -240,7 +436,7 @@ static int is_pid(const char *str)
static int netns_pids(int argc, char **argv)
{
const char *name;
- char net_path[MAXPATHLEN];
+ char net_path[PATH_MAX];
int netns;
struct stat netst;
DIR *dir;
@@ -275,7 +471,7 @@ static int netns_pids(int argc, char **argv)
return -1;
}
while((entry = readdir(dir))) {
- char pid_net_path[MAXPATHLEN];
+ char pid_net_path[PATH_MAX];
struct stat st;
if (!is_pid(entry->d_name))
continue;
@@ -296,7 +492,7 @@ static int netns_pids(int argc, char **argv)
static int netns_identify(int argc, char **argv)
{
const char *pidstr;
- char net_path[MAXPATHLEN];
+ char net_path[PATH_MAX];
int netns;
struct stat netst;
DIR *dir;
@@ -340,7 +536,7 @@ static int netns_identify(int argc, char **argv)
}
while((entry = readdir(dir))) {
- char name_path[MAXPATHLEN];
+ char name_path[PATH_MAX];
struct stat st;
if (strcmp(entry->d_name, ".") == 0)
@@ -366,7 +562,7 @@ static int netns_identify(int argc, char **argv)
static int on_netns_del(char *nsname, void *arg)
{
- char netns_path[MAXPATHLEN];
+ char netns_path[PATH_MAX];
snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, nsname);
umount2(netns_path, MNT_DETACH);
@@ -415,7 +611,7 @@ static int netns_add(int argc, char **argv)
* userspace tweaks like remounting /sys, or bind mounting
* a new /etc/resolv.conf can be shared between uers.
*/
- char netns_path[MAXPATHLEN];
+ char netns_path[PATH_MAX];
const char *name;
int fd;
int made_netns_run_dir_mount = 0;
@@ -501,7 +697,7 @@ static int set_netnsid_from_name(const char *name, int nsid)
addattr32(&req.n, 1024, NETNSA_FD, fd);
addattr32(&req.n, 1024, NETNSA_NSID, nsid);
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
err = -2;
close(fd);
@@ -510,7 +706,7 @@ static int set_netnsid_from_name(const char *name, int nsid)
static int netns_set(int argc, char **argv)
{
- char netns_path[MAXPATHLEN];
+ char netns_path[PATH_MAX];
const char *name;
int netns, nsid;
@@ -577,6 +773,8 @@ static int netns_monitor(int argc, char **argv)
int do_netns(int argc, char **argv)
{
+ netns_map_init();
+
if (argc < 1)
return netns_list(0, NULL);
@@ -584,6 +782,9 @@ int do_netns(int argc, char **argv)
(matches(*argv, "lst") == 0))
return netns_list(argc-1, argv+1);
+ if ((matches(*argv, "list-id") == 0))
+ return netns_list_id(argc-1, argv+1);
+
if (matches(*argv, "help") == 0)
return usage();
diff --git a/ip/ipntable.c b/ip/ipntable.c
index ea7ca2d2..6eb84e79 100644
--- a/ip/ipntable.c
+++ b/ip/ipntable.c
@@ -313,7 +313,7 @@ static int ipntable_modify(int cmd, int flags, int argc, char **argv)
RTA_PAYLOAD(parms_rta));
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
exit(2);
return 0;
@@ -349,7 +349,7 @@ static const char *ntable_strtime_delta(__u32 msec)
return str;
}
-int print_ntable(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+static int print_ntable(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
struct ndtmsg *ndtm = NLMSG_DATA(n);
@@ -601,7 +601,7 @@ int print_ntable(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
return 0;
}
-void ipntable_reset_filter(void)
+static void ipntable_reset_filter(void)
{
memset(&filter, 0, sizeof(filter));
}
diff --git a/ip/ipprefix.c b/ip/ipprefix.c
index 02c0efce..ee51f04d 100644
--- a/ip/ipprefix.c
+++ b/ip/ipprefix.c
@@ -80,7 +80,9 @@ int print_prefix(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
pfx = (struct in6_addr *)RTA_DATA(tb[PREFIX_ADDRESS]);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "%s", rt_addr_n2a(family, pfx,
+ fprintf(fp, "%s", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[PREFIX_ADDRESS]),
+ pfx,
abuf, sizeof(abuf)));
}
fprintf(fp, "/%u ", prefix->prefix_len);
@@ -105,4 +107,3 @@ int print_prefix(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
return 0;
}
-
diff --git a/ip/iproute.c b/ip/iproute.c
index 25635ebc..381c3ddc 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -23,11 +23,13 @@
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <linux/in_route.h>
+#include <linux/icmpv6.h>
#include <errno.h>
#include "rt_names.h"
#include "utils.h"
#include "ip_common.h"
+#include "iproute_lwtunnel.h"
#ifndef RTAX_RTTVAR
#define RTAX_RTTVAR RTAX_HOPS
@@ -75,22 +77,28 @@ static void usage(void)
fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n");
fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n");
fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
- fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
- fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
+ fprintf(stderr, "NH := [ encap ENCAPTYPE ENCAPHDR ] [ via [ FAMILY ] ADDRESS ]\n");
+ fprintf(stderr, " [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
+ fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]\n");
+ fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n");
fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n");
fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n");
fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n");
fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n");
fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n");
+ fprintf(stderr, " [ pref PREF ] [ expires TIME ]\n");
fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n");
fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n");
fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n");
fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n");
+ fprintf(stderr, "PREF := [ low | medium | high ]\n");
fprintf(stderr, "TIME := NUMBER[s|ms]\n");
fprintf(stderr, "BOOL := [1|0]\n");
fprintf(stderr, "FEATURES := ecn\n");
+ fprintf(stderr, "ENCAPTYPE := [ mpls | ip | ip6 ]\n");
+ fprintf(stderr, "ENCAPHDR := [ MPLSLABEL ]\n");
exit(-1);
}
@@ -141,6 +149,9 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
table = rtm_get_table(r, tb);
+ if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family)
+ return 0;
+
if (r->rtm_family == AF_INET6 && table != RT_TABLE_MAIN)
ip6_multiple_tables = 1;
@@ -185,8 +196,15 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
(r->rtm_family != filter.msrc.family ||
(filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len)))
return 0;
- if (filter.rvia.family && r->rtm_family != filter.rvia.family)
- return 0;
+ if (filter.rvia.family) {
+ int family = r->rtm_family;
+ if (tb[RTA_VIA]) {
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ family = via->rtvia_family;
+ }
+ if (family != filter.rvia.family)
+ return 0;
+ }
if (filter.rprefsrc.family && r->rtm_family != filter.rprefsrc.family)
return 0;
@@ -205,6 +223,12 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
via.family = r->rtm_family;
if (tb[RTA_GATEWAY])
memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8);
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *rtvia = RTA_DATA(tb[RTA_VIA]);
+ via.family = rtvia->rtvia_family;
+ memcpy(&via.data, rtvia->rtvia_addr, len);
+ }
}
if (filter.rprefsrc.bitlen>0) {
memset(&prefsrc, 0, sizeof(prefsrc));
@@ -339,8 +363,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (tb[RTA_DST]) {
if (r->rtm_dst_len != host_len) {
fprintf(fp, "%s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[RTA_DST]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[RTA_DST]),
+ RTA_DATA(tb[RTA_DST]),
+ abuf, sizeof(abuf)),
r->rtm_dst_len
);
} else {
@@ -358,8 +383,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (tb[RTA_SRC]) {
if (r->rtm_src_len != host_len) {
fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[RTA_SRC]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[RTA_SRC]),
+ RTA_DATA(tb[RTA_SRC]),
+ abuf, sizeof(abuf)),
r->rtm_src_len
);
} else {
@@ -372,6 +398,17 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
} else if (r->rtm_src_len) {
fprintf(fp, "from 0/%u ", r->rtm_src_len);
}
+ if (tb[RTA_NEWDST]) {
+ fprintf(fp, "as to %s ", format_host(r->rtm_family,
+ RTA_PAYLOAD(tb[RTA_NEWDST]),
+ RTA_DATA(tb[RTA_NEWDST]),
+ abuf, sizeof(abuf))
+ );
+ }
+
+ if (tb[RTA_ENCAP])
+ lwt_print_encap(fp, tb[RTA_ENCAP_TYPE], tb[RTA_ENCAP]);
+
if (r->rtm_tos && filter.tosmask != -1) {
SPRINT_BUF(b1);
fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
@@ -384,12 +421,20 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
RTA_DATA(tb[RTA_GATEWAY]),
abuf, sizeof(abuf)));
}
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ fprintf(fp, "via %s %s ",
+ family_name(via->rtvia_family),
+ format_host(via->rtvia_family, len, via->rtvia_addr,
+ abuf, sizeof(abuf)));
+ }
if (tb[RTA_OIF] && filter.oifmask != -1)
fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
+ if (table && (table != RT_TABLE_MAIN || show_details > 0) && !filter.tb)
+ fprintf(fp, " table %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
if (!(r->rtm_flags&RTM_F_CLONED)) {
- if ((table != RT_TABLE_MAIN || show_details > 0) && !filter.tb)
- fprintf(fp, " table %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
if ((r->rtm_protocol != RTPROT_BOOT || show_details > 0) && filter.protocolmask != -1)
fprintf(fp, " proto %s ", rtnl_rtprot_n2a(r->rtm_protocol, b1, sizeof(b1)));
if ((r->rtm_scope != RT_SCOPE_UNIVERSE || show_details > 0) && filter.scopemask != -1)
@@ -401,6 +446,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
*/
fprintf(fp, " src %s ",
rt_addr_n2a(r->rtm_family,
+ RTA_PAYLOAD(tb[RTA_PREFSRC]),
RTA_DATA(tb[RTA_PREFSRC]),
abuf, sizeof(abuf)));
}
@@ -412,8 +458,12 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
fprintf(fp, "onlink ");
if (r->rtm_flags & RTNH_F_PERVASIVE)
fprintf(fp, "pervasive ");
+ if (r->rtm_flags & RTNH_F_OFFLOAD)
+ fprintf(fp, "offload ");
if (r->rtm_flags & RTM_F_NOTIFY)
fprintf(fp, "notify ");
+ if (r->rtm_flags & RTNH_F_LINKDOWN)
+ fprintf(fp, "linkdown ");
if (tb[RTA_MARK]) {
unsigned int mark = *(unsigned int*)RTA_DATA(tb[RTA_MARK]);
if (mark) {
@@ -530,6 +580,12 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (mxrta[i] == NULL)
continue;
+ if (i != RTAX_CC_ALGO)
+ val = rta_getattr_u32(mxrta[i]);
+
+ if (i == RTAX_HOPLIMIT && (int)val == -1)
+ continue;
+
if (i < sizeof(mx_names)/sizeof(char*) && mx_names[i])
fprintf(fp, " %s", mx_names[i]);
else
@@ -537,17 +593,11 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (mxlock & (1<<i))
fprintf(fp, " lock");
- if (i != RTAX_CC_ALGO)
- val = rta_getattr_u32(mxrta[i]);
switch (i) {
case RTAX_FEATURES:
print_rtax_features(fp, val);
break;
- case RTAX_HOPLIMIT:
- if ((int)val == -1)
- val = 0;
- /* fall through */
default:
fprintf(fp, " %u", val);
break;
@@ -594,6 +644,12 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
fprintf(fp, "%s\tnexthop", _SL_);
if (nh->rtnh_len > sizeof(*nh)) {
parse_rtattr(tb, RTA_MAX, RTNH_DATA(nh), nh->rtnh_len - sizeof(*nh));
+
+ if (tb[RTA_ENCAP])
+ lwt_print_encap(fp,
+ tb[RTA_ENCAP_TYPE],
+ tb[RTA_ENCAP]);
+
if (tb[RTA_GATEWAY]) {
fprintf(fp, " via %s ",
format_host(r->rtm_family,
@@ -601,6 +657,14 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
RTA_DATA(tb[RTA_GATEWAY]),
abuf, sizeof(abuf)));
}
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ fprintf(fp, "via %s %s ",
+ family_name(via->rtvia_family),
+ format_host(via->rtvia_family, len, via->rtvia_addr,
+ abuf, sizeof(abuf)));
+ }
if (tb[RTA_FLOW]) {
__u32 to = rta_getattr_u32(tb[RTA_FLOW]);
__u32 from = to>>16;
@@ -628,18 +692,37 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
fprintf(fp, " onlink");
if (nh->rtnh_flags & RTNH_F_PERVASIVE)
fprintf(fp, " pervasive");
+ if (nh->rtnh_flags & RTNH_F_LINKDOWN)
+ fprintf(fp, " linkdown");
len -= NLMSG_ALIGN(nh->rtnh_len);
nh = RTNH_NEXT(nh);
}
}
+ if (tb[RTA_PREF]) {
+ unsigned int pref = rta_getattr_u8(tb[RTA_PREF]);
+ fprintf(fp, " pref ");
+
+ switch (pref) {
+ case ICMPV6_ROUTER_PREF_LOW:
+ fprintf(fp, "low");
+ break;
+ case ICMPV6_ROUTER_PREF_MEDIUM:
+ fprintf(fp, "medium");
+ break;
+ case ICMPV6_ROUTER_PREF_HIGH:
+ fprintf(fp, "high");
+ break;
+ default:
+ fprintf(fp, "%u", pref);
+ }
+ }
fprintf(fp, "\n");
fflush(fp);
return 0;
}
-
-static int parse_one_nh(struct rtmsg *r, struct rtattr *rta,
- struct rtnexthop *rtnh,
+static int parse_one_nh(struct nlmsghdr *n, struct rtmsg *r,
+ struct rtattr *rta, struct rtnexthop *rtnh,
int *argcp, char ***argvp)
{
int argc = *argcp;
@@ -648,12 +731,23 @@ static int parse_one_nh(struct rtmsg *r, struct rtattr *rta,
while (++argv, --argc > 0) {
if (strcmp(*argv, "via") == 0) {
inet_prefix addr;
+ int family;
NEXT_ARG();
- get_addr(&addr, *argv, r->rtm_family);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = r->rtm_family;
+ else
+ NEXT_ARG();
+ get_addr(&addr, *argv, family);
if (r->rtm_family == AF_UNSPEC)
r->rtm_family = addr.family;
- rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
- rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+ if (addr.family == r->rtm_family) {
+ rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
+ rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+ } else {
+ rta_addattr_l(rta, 4096, RTA_VIA, &addr.family, addr.bytelen+2);
+ rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen+2;
+ }
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) {
@@ -671,10 +765,15 @@ static int parse_one_nh(struct rtmsg *r, struct rtattr *rta,
} else if (matches(*argv, "realms") == 0) {
__u32 realm;
NEXT_ARG();
- if (get_rt_realms(&realm, *argv))
+ if (get_rt_realms_or_raw(&realm, *argv))
invarg("\"realm\" value is invalid\n", *argv);
rta_addattr32(rta, 4096, RTA_FLOW, realm);
rtnh->rtnh_len += sizeof(struct rtattr) + 4;
+ } else if (strcmp(*argv, "encap") == 0) {
+ int len = rta->rta_len;
+
+ lwt_parse_encap(rta, 4096, &argc, &argv);
+ rtnh->rtnh_len += rta->rta_len - len;
} else
break;
}
@@ -706,7 +805,7 @@ static int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r,
memset(rtnh, 0, sizeof(*rtnh));
rtnh->rtnh_len = sizeof(*rtnh);
rta->rta_len += rtnh->rtnh_len;
- parse_one_nh(r, rta, rtnh, &argc, &argv);
+ parse_one_nh(n, r, rta, rtnh, &argc, &argv);
rtnh = RTNH_NEXT(rtnh);
}
@@ -732,6 +831,8 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
int scope_ok = 0;
int table_ok = 0;
int raw = 0;
+ int type_ok = 0;
+ static int hz;
memset(&req, 0, sizeof(req));
@@ -759,14 +860,33 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = addr.family;
addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen);
+ } else if (strcmp(*argv, "as") == 0) {
+ inet_prefix addr;
+ NEXT_ARG();
+ if (strcmp(*argv, "to") == 0) {
+ NEXT_ARG();
+ }
+ get_addr(&addr, *argv, req.r.rtm_family);
+ if (req.r.rtm_family == AF_UNSPEC)
+ req.r.rtm_family = addr.family;
+ addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen);
} else if (strcmp(*argv, "via") == 0) {
inet_prefix addr;
+ int family;
gw_ok = 1;
NEXT_ARG();
- get_addr(&addr, *argv, req.r.rtm_family);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = req.r.rtm_family;
+ else
+ NEXT_ARG();
+ get_addr(&addr, *argv, family);
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = addr.family;
- addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+ if (addr.family == req.r.rtm_family)
+ addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+ else
+ addattr_l(&req.n, sizeof(req), RTA_VIA, &addr.family, addr.bytelen+2);
} else if (strcmp(*argv, "from") == 0) {
inet_prefix addr;
NEXT_ARG();
@@ -783,9 +903,17 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
if (rtnl_dsfield_a2n(&tos, *argv))
invarg("\"tos\" value is invalid\n", *argv);
req.r.rtm_tos = tos;
+ } else if (strcmp(*argv, "expires") == 0 ) {
+ __u32 expires;
+ NEXT_ARG();
+ if (get_u32(&expires, *argv, 0))
+ invarg("\"expires\" value is invalid\n", *argv);
+ if (!hz)
+ hz = get_user_hz();
+ addattr32(&req.n, sizeof(req), RTA_EXPIRES, expires*hz);
} else if (matches(*argv, "metric") == 0 ||
matches(*argv, "priority") == 0 ||
- matches(*argv, "preference") == 0) {
+ strcmp(*argv, "preference") == 0) {
__u32 metric;
NEXT_ARG();
if (get_u32(&metric, *argv, 0))
@@ -815,7 +943,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
mxlock |= (1<<RTAX_HOPLIMIT);
NEXT_ARG();
}
- if (get_unsigned(&hoplimit, *argv, 0))
+ if (get_unsigned(&hoplimit, *argv, 0) || hoplimit > 255)
invarg("\"hoplimit\" value is invalid\n", *argv);
rta_addattr32(mxrta, sizeof(mxbuf), RTAX_HOPLIMIT, hoplimit);
} else if (strcmp(*argv, "advmss") == 0) {
@@ -952,7 +1080,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
} else if (matches(*argv, "realms") == 0) {
__u32 realm;
NEXT_ARG();
- if (get_rt_realms(&realm, *argv))
+ if (get_rt_realms_or_raw(&realm, *argv))
invarg("\"realm\" value is invalid\n", *argv);
addattr32(&req.n, sizeof(req), RTA_FLOW, realm);
} else if (strcmp(*argv, "onlink") == 0) {
@@ -982,6 +1110,29 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
strcmp(*argv, "oif") == 0) {
NEXT_ARG();
d = *argv;
+ } else if (matches(*argv, "pref") == 0) {
+ __u8 pref;
+ NEXT_ARG();
+ if (strcmp(*argv, "low") == 0)
+ pref = ICMPV6_ROUTER_PREF_LOW;
+ else if (strcmp(*argv, "medium") == 0)
+ pref = ICMPV6_ROUTER_PREF_MEDIUM;
+ else if (strcmp(*argv, "high") == 0)
+ pref = ICMPV6_ROUTER_PREF_HIGH;
+ else if (get_u8(&pref, *argv, 0))
+ invarg("\"pref\" value is invalid\n", *argv);
+ addattr8(&req.n, sizeof(req), RTA_PREF, pref);
+ } else if (strcmp(*argv, "encap") == 0) {
+ char buf[1024];
+ struct rtattr *rta = (void*)buf;
+
+ rta->rta_type = RTA_ENCAP;
+ rta->rta_len = RTA_LENGTH(0);
+
+ lwt_parse_encap(rta, sizeof(buf), &argc, &argv);
+
+ if (rta->rta_len > RTA_LENGTH(0))
+ addraw_l(&req.n, 1024, RTA_DATA(rta), RTA_PAYLOAD(rta));
} else {
int type;
inet_prefix dst;
@@ -993,6 +1144,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
rtnl_rtntype_a2n(&type, *argv) == 0) {
NEXT_ARG();
req.r.rtm_type = type;
+ type_ok = 1;
}
if (matches(*argv, "help") == 0)
@@ -1034,6 +1186,9 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
if (nhs_ok)
parse_nexthops(&req.n, &req.r, argc, argv);
+ if (req.r.rtm_family == AF_UNSPEC)
+ req.r.rtm_family = AF_INET;
+
if (!table_ok) {
if (req.r.rtm_type == RTN_LOCAL ||
req.r.rtm_type == RTN_BROADCAST ||
@@ -1042,8 +1197,11 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
req.r.rtm_table = RT_TABLE_LOCAL;
}
if (!scope_ok) {
- if (req.r.rtm_type == RTN_LOCAL ||
- req.r.rtm_type == RTN_NAT)
+ if (req.r.rtm_family == AF_INET6 ||
+ req.r.rtm_family == AF_MPLS)
+ req.r.rtm_scope = RT_SCOPE_UNIVERSE;
+ else if (req.r.rtm_type == RTN_LOCAL ||
+ req.r.rtm_type == RTN_NAT)
req.r.rtm_scope = RT_SCOPE_HOST;
else if (req.r.rtm_type == RTN_BROADCAST ||
req.r.rtm_type == RTN_MULTICAST ||
@@ -1058,11 +1216,11 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
}
}
- if (req.r.rtm_family == AF_UNSPEC)
- req.r.rtm_family = AF_INET;
+ if (!type_ok && req.r.rtm_family == AF_MPLS)
+ req.r.rtm_type = RTN_UNICAST;
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
- return -1;
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+ return -2;
return 0;
}
@@ -1099,7 +1257,8 @@ static int iproute_flush_cache(void)
char *buffer = "-1";
if (flush_fd < 0) {
- fprintf (stderr, "Cannot open \"%s\"\n", ROUTE_FLUSH_PATH);
+ fprintf (stderr, "Cannot open \"%s\": %s\n",
+ ROUTE_FLUSH_PATH, strerror(errno));
return -1;
}
@@ -1251,15 +1410,21 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
get_unsigned(&mark, *argv, 0);
filter.markmask = -1;
} else if (strcmp(*argv, "via") == 0) {
+ int family;
NEXT_ARG();
- get_prefix(&filter.rvia, *argv, do_ipv6);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = do_ipv6;
+ else
+ NEXT_ARG();
+ get_prefix(&filter.rvia, *argv, family);
} else if (strcmp(*argv, "src") == 0) {
NEXT_ARG();
get_prefix(&filter.rprefsrc, *argv, do_ipv6);
} else if (matches(*argv, "realms") == 0) {
__u32 realm;
NEXT_ARG();
- if (get_rt_realms(&realm, *argv))
+ if (get_rt_realms_or_raw(&realm, *argv))
invarg("invalid realms\n", *argv);
filter.realm = realm;
filter.realmmask = ~0U;
@@ -1523,8 +1688,10 @@ static int iproute_get(int argc, char **argv)
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = AF_INET;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0)
- exit(2);
+ req.r.rtm_flags |= RTM_F_LOOKUP_TABLE;
+
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0)
+ return -2;
if (connected && !from_ok) {
struct rtmsg *r = NLMSG_DATA(&req.n);
@@ -1559,25 +1726,28 @@ static int iproute_get(int argc, char **argv)
tb[RTA_OIF]->rta_type = 0;
if (tb[RTA_GATEWAY])
tb[RTA_GATEWAY]->rta_type = 0;
+ if (tb[RTA_VIA])
+ tb[RTA_VIA]->rta_type = 0;
if (!idev && tb[RTA_IIF])
tb[RTA_IIF]->rta_type = 0;
req.n.nlmsg_flags = NLM_F_REQUEST;
req.n.nlmsg_type = RTM_GETROUTE;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0)
- exit(2);
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0)
+ return -2;
}
if (print_route(NULL, &req.n, (void*)stdout) < 0) {
fprintf(stderr, "An error :-)\n");
- exit(1);
+ return -1;
}
- exit(0);
+ return 0;
}
-static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n,
- void *arg)
+static int restore_handler(const struct sockaddr_nl *nl,
+ struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
{
int ret;
@@ -1585,7 +1755,7 @@ static int restore_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n,
ll_init_map(&rth);
- ret = rtnl_talk(&rth, n, 0, 0, n);
+ ret = rtnl_talk(&rth, n, n, sizeof(*n));
if ((ret < 0) && (errno == EEXIST))
ret = 0;
@@ -1619,7 +1789,9 @@ static int iproute_restore(void)
exit(rtnl_from_file(stdin, &restore_handler, NULL));
}
-static int show_handler(const struct sockaddr_nl *nl, struct nlmsghdr *n, void *arg)
+static int show_handler(const struct sockaddr_nl *nl,
+ struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
{
print_route(nl, n, stdout);
return 0;
@@ -1687,4 +1859,3 @@ int do_iproute(int argc, char **argv)
fprintf(stderr, "Command \"%s\" is unknown, try \"ip route help\".\n", *argv);
exit(-1);
}
-
diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c
new file mode 100644
index 00000000..70749063
--- /dev/null
+++ b/ip/iproute_lwtunnel.c
@@ -0,0 +1,367 @@
+/*
+ * iproute_lwtunnel.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
+ * Thomas Graf <tgraf@suug.ch>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <linux/ila.h>
+#include <linux/lwtunnel.h>
+#include <linux/mpls_iptunnel.h>
+#include <errno.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "iproute_lwtunnel.h"
+
+static int read_encap_type(const char *name)
+{
+ if (strcmp(name, "mpls") == 0)
+ return LWTUNNEL_ENCAP_MPLS;
+ else if (strcmp(name, "ip") == 0)
+ return LWTUNNEL_ENCAP_IP;
+ else if (strcmp(name, "ip6") == 0)
+ return LWTUNNEL_ENCAP_IP6;
+ else if (strcmp(name, "ila") == 0)
+ return LWTUNNEL_ENCAP_ILA;
+ else
+ return LWTUNNEL_ENCAP_NONE;
+}
+
+static const char *format_encap_type(int type)
+{
+ switch (type) {
+ case LWTUNNEL_ENCAP_MPLS:
+ return "mpls";
+ case LWTUNNEL_ENCAP_IP:
+ return "ip";
+ case LWTUNNEL_ENCAP_IP6:
+ return "ip6";
+ case LWTUNNEL_ENCAP_ILA:
+ return "ila";
+ default:
+ return "unknown";
+ }
+}
+
+static void print_encap_mpls(FILE *fp, struct rtattr *encap)
+{
+ struct rtattr *tb[MPLS_IPTUNNEL_MAX+1];
+ char abuf[256];
+
+ parse_rtattr_nested(tb, MPLS_IPTUNNEL_MAX, encap);
+
+ if (tb[MPLS_IPTUNNEL_DST])
+ fprintf(fp, " %s ", format_host(AF_MPLS,
+ RTA_PAYLOAD(tb[MPLS_IPTUNNEL_DST]),
+ RTA_DATA(tb[MPLS_IPTUNNEL_DST]),
+ abuf, sizeof(abuf)));
+}
+
+static void print_encap_ip(FILE *fp, struct rtattr *encap)
+{
+ struct rtattr *tb[LWTUNNEL_IP_MAX+1];
+ char abuf[256];
+
+ parse_rtattr_nested(tb, LWTUNNEL_IP_MAX, encap);
+
+ if (tb[LWTUNNEL_IP_ID])
+ fprintf(fp, "id %llu ", ntohll(rta_getattr_u64(tb[LWTUNNEL_IP_ID])));
+
+ if (tb[LWTUNNEL_IP_SRC])
+ fprintf(fp, "src %s ",
+ rt_addr_n2a(AF_INET,
+ RTA_PAYLOAD(tb[LWTUNNEL_IP_SRC]),
+ RTA_DATA(tb[LWTUNNEL_IP_SRC]),
+ abuf, sizeof(abuf)));
+
+ if (tb[LWTUNNEL_IP_DST])
+ fprintf(fp, "dst %s ",
+ rt_addr_n2a(AF_INET,
+ RTA_PAYLOAD(tb[LWTUNNEL_IP_DST]),
+ RTA_DATA(tb[LWTUNNEL_IP_DST]),
+ abuf, sizeof(abuf)));
+
+ if (tb[LWTUNNEL_IP_TTL])
+ fprintf(fp, "ttl %d ", rta_getattr_u8(tb[LWTUNNEL_IP_TTL]));
+
+ if (tb[LWTUNNEL_IP_TOS])
+ fprintf(fp, "tos %d ", rta_getattr_u8(tb[LWTUNNEL_IP_TOS]));
+}
+
+static void print_encap_ila(FILE *fp, struct rtattr *encap)
+{
+ struct rtattr *tb[ILA_ATTR_MAX+1];
+
+ parse_rtattr_nested(tb, ILA_ATTR_MAX, encap);
+
+ if (tb[ILA_ATTR_LOCATOR]) {
+ char abuf[ADDR64_BUF_SIZE];
+
+ addr64_n2a(*(__u64 *)RTA_DATA(tb[ILA_ATTR_LOCATOR]),
+ abuf, sizeof(abuf));
+ fprintf(fp, " %s ", abuf);
+ }
+}
+
+static void print_encap_ip6(FILE *fp, struct rtattr *encap)
+{
+ struct rtattr *tb[LWTUNNEL_IP6_MAX+1];
+ char abuf[256];
+
+ parse_rtattr_nested(tb, LWTUNNEL_IP6_MAX, encap);
+
+ if (tb[LWTUNNEL_IP6_ID])
+ fprintf(fp, "id %llu ", ntohll(rta_getattr_u64(tb[LWTUNNEL_IP6_ID])));
+
+ if (tb[LWTUNNEL_IP6_SRC])
+ fprintf(fp, "src %s ",
+ rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[LWTUNNEL_IP6_SRC]),
+ RTA_DATA(tb[LWTUNNEL_IP6_SRC]),
+ abuf, sizeof(abuf)));
+
+ if (tb[LWTUNNEL_IP6_DST])
+ fprintf(fp, "dst %s ",
+ rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[LWTUNNEL_IP6_DST]),
+ RTA_DATA(tb[LWTUNNEL_IP6_DST]),
+ abuf, sizeof(abuf)));
+
+ if (tb[LWTUNNEL_IP6_HOPLIMIT])
+ fprintf(fp, "hoplimit %d ", rta_getattr_u8(tb[LWTUNNEL_IP6_HOPLIMIT]));
+
+ if (tb[LWTUNNEL_IP6_TC])
+ fprintf(fp, "tc %d ", rta_getattr_u8(tb[LWTUNNEL_IP6_TC]));
+}
+
+void lwt_print_encap(FILE *fp, struct rtattr *encap_type,
+ struct rtattr *encap)
+{
+ int et;
+
+ if (!encap_type)
+ return;
+
+ et = rta_getattr_u16(encap_type);
+
+ fprintf(fp, " encap %s ", format_encap_type(et));
+
+ switch (et) {
+ case LWTUNNEL_ENCAP_MPLS:
+ print_encap_mpls(fp, encap);
+ break;
+ case LWTUNNEL_ENCAP_IP:
+ print_encap_ip(fp, encap);
+ break;
+ case LWTUNNEL_ENCAP_ILA:
+ print_encap_ila(fp, encap);
+ break;
+ case LWTUNNEL_ENCAP_IP6:
+ print_encap_ip6(fp, encap);
+ break;
+ }
+}
+
+static int parse_encap_mpls(struct rtattr *rta, size_t len, int *argcp, char ***argvp)
+{
+ inet_prefix addr;
+ int argc = *argcp;
+ char **argv = *argvp;
+
+ if (get_addr(&addr, *argv, AF_MPLS)) {
+ fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", *argv);
+ exit(1);
+ }
+
+ rta_addattr_l(rta, len, MPLS_IPTUNNEL_DST, &addr.data,
+ addr.bytelen);
+
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
+
+static int parse_encap_ip(struct rtattr *rta, size_t len, int *argcp, char ***argvp)
+{
+ int id_ok = 0, dst_ok = 0, tos_ok = 0, ttl_ok = 0;
+ char **argv = *argvp;
+ int argc = *argcp;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "id") == 0) {
+ __u64 id;
+ NEXT_ARG();
+ if (id_ok++)
+ duparg2("id", *argv);
+ if (get_u64(&id, *argv, 0))
+ invarg("\"id\" value is invalid\n", *argv);
+ rta_addattr64(rta, len, LWTUNNEL_IP_ID, htonll(id));
+ } else if (strcmp(*argv, "dst") == 0) {
+ inet_prefix addr;
+ NEXT_ARG();
+ if (dst_ok++)
+ duparg2("dst", *argv);
+ get_addr(&addr, *argv, AF_INET);
+ rta_addattr_l(rta, len, LWTUNNEL_IP_DST, &addr.data, addr.bytelen);
+ } else if (strcmp(*argv, "tos") == 0) {
+ __u32 tos;
+ NEXT_ARG();
+ if (tos_ok++)
+ duparg2("tos", *argv);
+ if (rtnl_dsfield_a2n(&tos, *argv))
+ invarg("\"tos\" value is invalid\n", *argv);
+ rta_addattr8(rta, len, LWTUNNEL_IP_TOS, tos);
+ } else if (strcmp(*argv, "ttl") == 0) {
+ __u8 ttl;
+ NEXT_ARG();
+ if (ttl_ok++)
+ duparg2("ttl", *argv);
+ if (get_u8(&ttl, *argv, 0))
+ invarg("\"ttl\" value is invalid\n", *argv);
+ rta_addattr8(rta, len, LWTUNNEL_IP_TTL, ttl);
+ } else {
+ break;
+ }
+ argc--; argv++;
+ }
+
+ /* argv is currently the first unparsed argument,
+ * but the lwt_parse_encap() caller will move to the next,
+ * so step back */
+ *argcp = argc + 1;
+ *argvp = argv - 1;
+
+ return 0;
+}
+
+static int parse_encap_ila(struct rtattr *rta, size_t len,
+ int *argcp, char ***argvp)
+{
+ __u64 locator;
+ int argc = *argcp;
+ char **argv = *argvp;
+
+ if (get_addr64(&locator, *argv) < 0) {
+ fprintf(stderr, "Bad locator: %s\n", *argv);
+ exit(1);
+ }
+
+ rta_addattr64(rta, 1024, ILA_ATTR_LOCATOR, locator);
+
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
+
+static int parse_encap_ip6(struct rtattr *rta, size_t len, int *argcp, char ***argvp)
+{
+ int id_ok = 0, dst_ok = 0, tos_ok = 0, ttl_ok = 0;
+ char **argv = *argvp;
+ int argc = *argcp;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "id") == 0) {
+ __u64 id;
+ NEXT_ARG();
+ if (id_ok++)
+ duparg2("id", *argv);
+ if (get_u64(&id, *argv, 0))
+ invarg("\"id\" value is invalid\n", *argv);
+ rta_addattr64(rta, len, LWTUNNEL_IP6_ID, htonll(id));
+ } else if (strcmp(*argv, "dst") == 0) {
+ inet_prefix addr;
+ NEXT_ARG();
+ if (dst_ok++)
+ duparg2("dst", *argv);
+ get_addr(&addr, *argv, AF_INET6);
+ rta_addattr_l(rta, len, LWTUNNEL_IP6_DST, &addr.data, addr.bytelen);
+ } else if (strcmp(*argv, "tc") == 0) {
+ __u32 tc;
+ NEXT_ARG();
+ if (tos_ok++)
+ duparg2("tc", *argv);
+ if (rtnl_dsfield_a2n(&tc, *argv))
+ invarg("\"tc\" value is invalid\n", *argv);
+ rta_addattr8(rta, len, LWTUNNEL_IP6_TC, tc);
+ } else if (strcmp(*argv, "hoplimit") == 0) {
+ __u8 hoplimit;
+ NEXT_ARG();
+ if (ttl_ok++)
+ duparg2("hoplimit", *argv);
+ if (get_u8(&hoplimit, *argv, 0))
+ invarg("\"hoplimit\" value is invalid\n", *argv);
+ rta_addattr8(rta, len, LWTUNNEL_IP6_HOPLIMIT, hoplimit);
+ } else {
+ break;
+ }
+ argc--; argv++;
+ }
+
+ /* argv is currently the first unparsed argument,
+ * but the lwt_parse_encap() caller will move to the next,
+ * so step back */
+ *argcp = argc + 1;
+ *argvp = argv - 1;
+
+ return 0;
+}
+
+int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp)
+{
+ struct rtattr *nest;
+ int argc = *argcp;
+ char **argv = *argvp;
+ __u16 type;
+
+ NEXT_ARG();
+ type = read_encap_type(*argv);
+ if (!type)
+ invarg("\"encap type\" value is invalid\n", *argv);
+
+ NEXT_ARG();
+ if (argc <= 1) {
+ fprintf(stderr, "Error: unexpected end of line after \"encap\"\n");
+ exit(-1);
+ }
+
+ nest = rta_nest(rta, 1024, RTA_ENCAP);
+ switch (type) {
+ case LWTUNNEL_ENCAP_MPLS:
+ parse_encap_mpls(rta, len, &argc, &argv);
+ break;
+ case LWTUNNEL_ENCAP_IP:
+ parse_encap_ip(rta, len, &argc, &argv);
+ break;
+ case LWTUNNEL_ENCAP_ILA:
+ parse_encap_ila(rta, len, &argc, &argv);
+ break;
+ case LWTUNNEL_ENCAP_IP6:
+ parse_encap_ip6(rta, len, &argc, &argv);
+ break;
+ default:
+ fprintf(stderr, "Error: unsupported encap type\n");
+ break;
+ }
+ rta_nest_end(rta, nest);
+
+ rta_addattr16(rta, 1024, RTA_ENCAP_TYPE, type);
+
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
diff --git a/ip/iproute_lwtunnel.h b/ip/iproute_lwtunnel.h
new file mode 100644
index 00000000..b82b58ad
--- /dev/null
+++ b/ip/iproute_lwtunnel.h
@@ -0,0 +1,8 @@
+#ifndef __LWTUNNEL_H__
+#define __LETUNNEL_H__ 1
+
+int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp);
+void lwt_print_encap(FILE *fp, struct rtattr *encap_type,
+ struct rtattr *encap);
+
+#endif
diff --git a/ip/iprule.c b/ip/iprule.c
index 9304cf02..c363cccf 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -21,6 +21,7 @@
#include <arpa/inet.h>
#include <string.h>
#include <linux/fib_rules.h>
+#include <errno.h>
#include "rt_names.h"
#include "utils.h"
@@ -32,11 +33,11 @@ static void usage(void) __attribute__((noreturn));
static void usage(void)
{
- fprintf(stderr, "Usage: ip rule [ list | add | del | flush ] SELECTOR ACTION\n");
+ fprintf(stderr, "Usage: ip rule [ list | add | del | flush | save ] SELECTOR ACTION\n");
+ fprintf(stderr, " ip rule restore\n");
fprintf(stderr, "SELECTOR := [ not ] [ from PREFIX ] [ to PREFIX ] [ tos TOS ] [ fwmark FWMARK[/MASK] ]\n");
fprintf(stderr, " [ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ uidrange UID1-UID2 ]\n");
fprintf(stderr, "ACTION := [ table TABLE_ID ]\n");
- fprintf(stderr, " [ prohibit | reject | unreachable ]\n");
fprintf(stderr, " [ realms [SRCREALM/]DSTREALM ]\n");
fprintf(stderr, " [ goto NUMBER ]\n");
fprintf(stderr, " SUPPRESSOR\n");
@@ -82,8 +83,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (tb[FRA_SRC]) {
if (r->rtm_src_len != host_len) {
fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[FRA_SRC]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[FRA_SRC]),
+ RTA_DATA(tb[FRA_SRC]),
+ abuf, sizeof(abuf)),
r->rtm_src_len
);
} else {
@@ -102,8 +104,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
if (tb[FRA_DST]) {
if (r->rtm_dst_len != host_len) {
fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[FRA_DST]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[FRA_DST]),
+ RTA_DATA(tb[FRA_DST]),
+ abuf, sizeof(abuf)),
r->rtm_dst_len
);
} else {
@@ -217,24 +220,65 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
return 0;
}
-static int iprule_list(int argc, char **argv)
+static __u32 rule_dump_magic = 0x71706986;
+
+static int save_rule_prep(void)
+{
+ int ret;
+
+ if (isatty(STDOUT_FILENO)) {
+ fprintf(stderr, "Not sending a binary stream to stdout\n");
+ return -1;
+ }
+
+ ret = write(STDOUT_FILENO, &rule_dump_magic, sizeof(rule_dump_magic));
+ if (ret != sizeof(rule_dump_magic)) {
+ fprintf(stderr, "Can't write magic to dump file\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int save_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+ int ret;
+
+ ret = write(STDOUT_FILENO, n, n->nlmsg_len);
+ if ((ret > 0) && (ret != n->nlmsg_len)) {
+ fprintf(stderr, "Short write while saving nlmsg\n");
+ ret = -EIO;
+ }
+
+ return ret == n->nlmsg_len ? 0 : ret;
+}
+
+static int iprule_list_or_save(int argc, char **argv, int save)
{
+ rtnl_filter_t filter = print_rule;
int af = preferred_family;
if (af == AF_UNSPEC)
af = AF_INET;
if (argc > 0) {
- fprintf(stderr, "\"ip rule show\" does not take any arguments.\n");
+ fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
+ save ? "save" : "show");
return -1;
}
+ if (save) {
+ if (save_rule_prep())
+ return -1;
+ filter = save_rule;
+ }
+
if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
perror("Cannot send dump request");
return 1;
}
- if (rtnl_dump_filter(&rth, print_rule, stdout) < 0) {
+ if (rtnl_dump_filter(&rth, filter, stdout) < 0) {
fprintf(stderr, "Dump terminated\n");
return 1;
}
@@ -242,6 +286,50 @@ static int iprule_list(int argc, char **argv)
return 0;
}
+static int rule_dump_check_magic(void)
+{
+ int ret;
+ __u32 magic = 0;
+
+ if (isatty(STDIN_FILENO)) {
+ fprintf(stderr, "Can't restore rule dump from a terminal\n");
+ return -1;
+ }
+
+ ret = fread(&magic, sizeof(magic), 1, stdin);
+ if (magic != rule_dump_magic) {
+ fprintf(stderr, "Magic mismatch (%d elems, %x magic)\n", ret, magic);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int restore_handler(const struct sockaddr_nl *nl,
+ struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
+{
+ int ret;
+
+ n->nlmsg_flags |= NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK;
+
+ ll_init_map(&rth);
+
+ ret = rtnl_talk(&rth, n, n, sizeof(*n));
+ if ((ret < 0) && (errno == EEXIST))
+ ret = 0;
+
+ return ret;
+}
+
+
+static int iprule_restore(void)
+{
+ if (rule_dump_check_magic())
+ exit(-1);
+
+ exit(rtnl_from_file(stdin, &restore_handler, NULL));
+}
static int iprule_modify(int cmd, int argc, char **argv)
{
@@ -316,7 +404,7 @@ static int iprule_modify(int cmd, int argc, char **argv)
} else if (matches(*argv, "realms") == 0) {
__u32 realm;
NEXT_ARG();
- if (get_rt_realms(&realm, *argv))
+ if (get_rt_realms_or_raw(&realm, *argv))
invarg("invalid realms\n", *argv);
addattr32(&req.n, sizeof(req), FRA_FLOW, realm);
} else if (matches(*argv, "table") == 0 ||
@@ -398,8 +486,8 @@ static int iprule_modify(int cmd, int argc, char **argv)
if (!table_ok && cmd == RTM_NEWRULE)
req.r.rtm_table = RT_TABLE_MAIN;
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
- return 2;
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+ return -2;
return 0;
}
@@ -425,7 +513,7 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *a
if (rtnl_open(&rth2, 0) < 0)
return -1;
- if (rtnl_talk(&rth2, n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth2, n, NULL, 0) < 0)
return -2;
rtnl_close(&rth2);
@@ -462,11 +550,15 @@ static int iprule_flush(int argc, char **argv)
int do_iprule(int argc, char **argv)
{
if (argc < 1) {
- return iprule_list(0, NULL);
+ return iprule_list_or_save(0, NULL, 0);
} else if (matches(argv[0], "list") == 0 ||
matches(argv[0], "lst") == 0 ||
matches(argv[0], "show") == 0) {
- return iprule_list(argc-1, argv+1);
+ return iprule_list_or_save(argc-1, argv+1, 0);
+ } else if (matches(argv[0], "save") == 0) {
+ return iprule_list_or_save(argc-1, argv+1, 1);
+ } else if (matches(argv[0], "restore") == 0) {
+ return iprule_restore();
} else if (matches(argv[0], "add") == 0) {
return iprule_modify(RTM_NEWRULE, argc-1, argv+1);
} else if (matches(argv[0], "delete") == 0) {
diff --git a/ip/iptoken.c b/ip/iptoken.c
index 655f1601..428f1332 100644
--- a/ip/iptoken.c
+++ b/ip/iptoken.c
@@ -95,10 +95,6 @@ static int iptoken_list(int argc, char **argv)
{
int af = AF_INET6;
struct rtnl_dump_args da;
- const struct rtnl_dump_filter_arg a[2] = {
- { .filter = print_token, .arg1 = &da, },
- { .filter = NULL, .arg1 = NULL, },
- };
memset(&da, 0, sizeof(da));
da.fp = stdout;
@@ -118,7 +114,7 @@ static int iptoken_list(int argc, char **argv)
return -1;
}
- if (rtnl_dump_filter_l(&rth, a) < 0) {
+ if (rtnl_dump_filter(&rth, print_token, &da) < 0) {
fprintf(stderr, "Dump terminated\n");
return -1;
}
@@ -182,7 +178,7 @@ static int iptoken_set(int argc, char **argv)
return -1;
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return -2;
return 0;
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index caf8a28e..65a4e6e9 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -47,6 +47,16 @@ static void usage(void)
exit(-1);
}
+static void set_tunnel_proto(struct ip_tunnel_parm *p, int proto)
+{
+ if (p->iph.protocol && p->iph.protocol != proto) {
+ fprintf(stderr,
+ "You managed to ask for more than one tunnel mode.\n");
+ exit(-1);
+ }
+ p->iph.protocol = proto;
+}
+
static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
{
int count = 0;
@@ -68,83 +78,37 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
NEXT_ARG();
if (strcmp(*argv, "ipip") == 0 ||
strcmp(*argv, "ip/ip") == 0) {
- if (p->iph.protocol && p->iph.protocol != IPPROTO_IPIP) {
- fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
- exit(-1);
- }
- p->iph.protocol = IPPROTO_IPIP;
+ set_tunnel_proto(p, IPPROTO_IPIP);
} else if (strcmp(*argv, "gre") == 0 ||
strcmp(*argv, "gre/ip") == 0) {
- if (p->iph.protocol && p->iph.protocol != IPPROTO_GRE) {
- fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
- exit(-1);
- }
- p->iph.protocol = IPPROTO_GRE;
+ set_tunnel_proto(p, IPPROTO_GRE);
} else if (strcmp(*argv, "sit") == 0 ||
strcmp(*argv, "ipv6/ip") == 0) {
- if (p->iph.protocol && p->iph.protocol != IPPROTO_IPV6) {
- fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
- exit(-1);
- }
- p->iph.protocol = IPPROTO_IPV6;
+ set_tunnel_proto(p, IPPROTO_IPV6);
} else if (strcmp(*argv, "isatap") == 0) {
- if (p->iph.protocol && p->iph.protocol != IPPROTO_IPV6) {
- fprintf(stderr, "You managed to ask for more than one tunnel mode.\n");
- exit(-1);
- }
- p->iph.protocol = IPPROTO_IPV6;
+ set_tunnel_proto(p, IPPROTO_IPV6);
isatap++;
} else if (strcmp(*argv, "vti") == 0) {
- if (p->iph.protocol && p->iph.protocol != IPPROTO_IPIP) {
- fprintf(stderr, "You managed to ask for more than one tunnel mode.\n");
- exit(-1);
- }
- p->iph.protocol = IPPROTO_IPIP;
+ set_tunnel_proto(p, IPPROTO_IPIP);
p->i_flags |= VTI_ISVTI;
} else {
- fprintf(stderr,"Unknown tunnel mode \"%s\"\n", *argv);
+ fprintf(stderr,
+ "Unknown tunnel mode \"%s\"\n", *argv);
exit(-1);
}
} else if (strcmp(*argv, "key") == 0) {
- unsigned uval;
NEXT_ARG();
p->i_flags |= GRE_KEY;
p->o_flags |= GRE_KEY;
- if (strchr(*argv, '.'))
- p->i_key = p->o_key = get_addr32(*argv);
- else {
- if (get_unsigned(&uval, *argv, 0)<0) {
- fprintf(stderr, "invalid value for \"key\": \"%s\"; it should be an unsigned integer\n", *argv);
- exit(-1);
- }
- p->i_key = p->o_key = htonl(uval);
- }
+ p->i_key = p->o_key = tnl_parse_key("key", *argv);
} else if (strcmp(*argv, "ikey") == 0) {
- unsigned uval;
NEXT_ARG();
p->i_flags |= GRE_KEY;
- if (strchr(*argv, '.'))
- p->i_key = get_addr32(*argv);
- else {
- if (get_unsigned(&uval, *argv, 0)<0) {
- fprintf(stderr, "invalid value for \"ikey\": \"%s\"; it should be an unsigned integer\n", *argv);
- exit(-1);
- }
- p->i_key = htonl(uval);
- }
+ p->i_key = tnl_parse_key("ikey", *argv);
} else if (strcmp(*argv, "okey") == 0) {
- unsigned uval;
NEXT_ARG();
p->o_flags |= GRE_KEY;
- if (strchr(*argv, '.'))
- p->o_key = get_addr32(*argv);
- else {
- if (get_unsigned(&uval, *argv, 0)<0) {
- fprintf(stderr, "invalid value for \"okey\": \"%s\"; it should be an unsigned integer\n", *argv);
- exit(-1);
- }
- p->o_key = htonl(uval);
- }
+ p->o_key = tnl_parse_key("okey", *argv);
} else if (strcmp(*argv, "seq") == 0) {
p->i_flags |= GRE_SEQ;
p->o_flags |= GRE_SEQ;
@@ -167,22 +131,26 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
NEXT_ARG();
if (strcmp(*argv, "any"))
p->iph.daddr = get_addr32(*argv);
+ else
+ p->iph.daddr = htonl(INADDR_ANY);
} else if (strcmp(*argv, "local") == 0) {
NEXT_ARG();
if (strcmp(*argv, "any"))
p->iph.saddr = get_addr32(*argv);
+ else
+ p->iph.saddr = htonl(INADDR_ANY);
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
- strncpy(medium, *argv, IFNAMSIZ-1);
+ strncpy(medium, *argv, IFNAMSIZ - 1);
} else if (strcmp(*argv, "ttl") == 0 ||
- strcmp(*argv, "hoplimit") == 0) {
- unsigned uval;
+ strcmp(*argv, "hoplimit") == 0 ||
+ strcmp(*argv, "hlim") == 0) {
+ __u8 uval;
+
NEXT_ARG();
if (strcmp(*argv, "inherit") != 0) {
- if (get_unsigned(&uval, *argv, 0))
+ if (get_u8(&uval, *argv, 0))
invarg("invalid TTL\n", *argv);
- if (uval > 255)
- invarg("TTL must be <=255\n", *argv);
p->iph.ttl = uval;
}
} else if (strcmp(*argv, "tos") == 0 ||
@@ -190,6 +158,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
matches(*argv, "dsfield") == 0) {
char *dsfield;
__u32 uval;
+
NEXT_ARG();
dsfield = *argv;
strsep(&dsfield, "/");
@@ -204,15 +173,17 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
p->iph.tos |= uval;
}
} else {
- if (strcmp(*argv, "name") == 0) {
+ if (strcmp(*argv, "name") == 0)
NEXT_ARG();
- } else if (matches(*argv, "help") == 0)
+ else if (matches(*argv, "help") == 0)
usage();
+
if (p->name[0])
duparg2("name", *argv);
- strncpy(p->name, *argv, IFNAMSIZ);
+ strncpy(p->name, *argv, IFNAMSIZ - 1);
if (cmd == SIOCCHGTUNNEL && count == 0) {
struct ip_tunnel_parm old_p;
+
memset(&old_p, 0, sizeof(old_p));
if (tnl_get_ioctl(*argv, &old_p))
return -1;
@@ -249,10 +220,9 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
}
if (medium[0]) {
- p->link = if_nametoindex(medium);
+ p->link = ll_name_to_index(medium);
if (p->link == 0) {
- fprintf(stderr, "Cannot find device \"%s\"\n",
- medium);
+ fprintf(stderr, "Cannot find device \"%s\"\n", medium);
return -1;
}
}
@@ -275,10 +245,26 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
return 0;
}
+static const char *tnl_defname(const struct ip_tunnel_parm *p)
+{
+ switch (p->iph.protocol) {
+ case IPPROTO_IPIP:
+ if (p->i_flags & VTI_ISVTI)
+ return "ip_vti0";
+ else
+ return "tunl0";
+ case IPPROTO_GRE:
+ return "gre0";
+ case IPPROTO_IPV6:
+ return "sit0";
+ }
+ return NULL;
+}
static int do_add(int cmd, int argc, char **argv)
{
struct ip_tunnel_parm p;
+ const char *basedev;
if (parse_args(argc, argv, cmd, &p) < 0)
return -1;
@@ -288,21 +274,14 @@ static int do_add(int cmd, int argc, char **argv)
return -1;
}
- switch (p.iph.protocol) {
- case IPPROTO_IPIP:
- if (p.i_flags & VTI_ISVTI)
- return tnl_add_ioctl(cmd, "ip_vti0", p.name, &p);
- else
- return tnl_add_ioctl(cmd, "tunl0", p.name, &p);
- case IPPROTO_GRE:
- return tnl_add_ioctl(cmd, "gre0", p.name, &p);
- case IPPROTO_IPV6:
- return tnl_add_ioctl(cmd, "sit0", p.name, &p);
- default:
- fprintf(stderr, "cannot determine tunnel mode (ipip, gre, vti or sit)\n");
+ basedev = tnl_defname(&p);
+ if (!basedev) {
+ fprintf(stderr,
+ "cannot determine tunnel mode (ipip, gre, vti or sit)\n");
return -1;
}
- return -1;
+
+ return tnl_add_ioctl(cmd, basedev, p.name, &p);
}
static int do_del(int argc, char **argv)
@@ -312,20 +291,7 @@ static int do_del(int argc, char **argv)
if (parse_args(argc, argv, SIOCDELTUNNEL, &p) < 0)
return -1;
- switch (p.iph.protocol) {
- case IPPROTO_IPIP:
- if (p.i_flags & VTI_ISVTI)
- return tnl_del_ioctl("ip_vti0", p.name, &p);
- else
- return tnl_del_ioctl("tunl0", p.name, &p);
- case IPPROTO_GRE:
- return tnl_del_ioctl("gre0", p.name, &p);
- case IPPROTO_IPV6:
- return tnl_del_ioctl("sit0", p.name, &p);
- default:
- return tnl_del_ioctl(p.name, p.name, &p);
- }
- return -1;
+ return tnl_del_ioctl(tnl_defname(&p) ? : p.name, p.name, &p);
}
static void print_tunnel(struct ip_tunnel_parm *p)
@@ -339,11 +305,11 @@ static void print_tunnel(struct ip_tunnel_parm *p)
/* Do not use format_host() for local addr,
* symbolic name will not be useful.
*/
- printf("%s: %s/ip remote %s local %s ",
+ printf("%s: %s/ip remote %s local %s",
p->name,
tnl_strproto(p->iph.protocol),
- p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
- p->iph.saddr ? rt_addr_n2a(AF_INET, &p->iph.saddr, s2, sizeof(s2)) : "any");
+ p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
+ p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any");
if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) {
struct ip_tunnel_prl prl[16];
@@ -354,81 +320,76 @@ static void print_tunnel(struct ip_tunnel_parm *p)
prl[0].addr = htonl(INADDR_ANY);
if (!tnl_prl_ioctl(SIOCGETPRL, p->name, prl))
- for (i = 1; i < sizeof(prl) / sizeof(prl[0]); i++)
- {
- if (prl[i].addr != htonl(INADDR_ANY)) {
- printf(" %s %s ",
- (prl[i].flags & PRL_DEFAULT) ? "pdr" : "pr",
- format_host(AF_INET, 4, &prl[i].addr, s1, sizeof(s1)));
+ for (i = 1; i < ARRAY_SIZE(prl); i++) {
+ if (prl[i].addr != htonl(INADDR_ANY)) {
+ printf(" %s %s ",
+ (prl[i].flags & PRL_DEFAULT) ? "pdr" : "pr",
+ format_host(AF_INET, 4, &prl[i].addr, s1, sizeof(s1)));
+ }
}
- }
}
if (p->link) {
const char *n = ll_index_to_name(p->link);
+
if (n)
- printf(" dev %s ", n);
+ printf(" dev %s", n);
}
if (p->iph.ttl)
- printf(" ttl %d ", p->iph.ttl);
+ printf(" ttl %d", p->iph.ttl);
else
- printf(" ttl inherit ");
+ printf(" ttl inherit");
if (p->iph.tos) {
SPRINT_BUF(b1);
printf(" tos");
- if (p->iph.tos&1)
+ if (p->iph.tos & 1)
printf(" inherit");
- if (p->iph.tos&~1)
- printf("%c%s ", p->iph.tos&1 ? '/' : ' ',
- rtnl_dsfield_n2a(p->iph.tos&~1, b1, sizeof(b1)));
+ if (p->iph.tos & ~1)
+ printf("%c%s ", p->iph.tos & 1 ? '/' : ' ',
+ rtnl_dsfield_n2a(p->iph.tos & ~1, b1, sizeof(b1)));
}
- if (!(p->iph.frag_off&htons(IP_DF)))
+ if (!(p->iph.frag_off & htons(IP_DF)))
printf(" nopmtudisc");
if (p->iph.protocol == IPPROTO_IPV6 && !tnl_ioctl_get_6rd(p->name, &ip6rd) && ip6rd.prefixlen) {
- printf(" 6rd-prefix %s/%u ",
+ printf(" 6rd-prefix %s/%u",
inet_ntop(AF_INET6, &ip6rd.prefix, s1, sizeof(s1)),
ip6rd.prefixlen);
if (ip6rd.relay_prefix) {
- printf("6rd-relay_prefix %s/%u ",
+ printf(" 6rd-relay_prefix %s/%u",
format_host(AF_INET, 4, &ip6rd.relay_prefix, s1, sizeof(s1)),
ip6rd.relay_prefixlen);
}
}
- if ((p->i_flags&GRE_KEY) && (p->o_flags&GRE_KEY) && p->o_key == p->i_key)
+ if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) && p->o_key == p->i_key)
printf(" key %u", ntohl(p->i_key));
- else if ((p->i_flags|p->o_flags)&GRE_KEY) {
- if (p->i_flags&GRE_KEY)
- printf(" ikey %u ", ntohl(p->i_key));
- if (p->o_flags&GRE_KEY)
- printf(" okey %u ", ntohl(p->o_key));
+ else if ((p->i_flags | p->o_flags) & GRE_KEY) {
+ if (p->i_flags & GRE_KEY)
+ printf(" ikey %u", ntohl(p->i_key));
+ if (p->o_flags & GRE_KEY)
+ printf(" okey %u", ntohl(p->o_key));
}
- if (p->i_flags&GRE_SEQ)
+ if (p->i_flags & GRE_SEQ)
printf("%s Drop packets out of sequence.", _SL_);
- if (p->i_flags&GRE_CSUM)
+ if (p->i_flags & GRE_CSUM)
printf("%s Checksum in received packet is required.", _SL_);
- if (p->o_flags&GRE_SEQ)
+ if (p->o_flags & GRE_SEQ)
printf("%s Sequence packets on output.", _SL_);
- if (p->o_flags&GRE_CSUM)
+ if (p->o_flags & GRE_CSUM)
printf("%s Checksum output packets.", _SL_);
}
static int do_tunnels_list(struct ip_tunnel_parm *p)
{
- char name[IFNAMSIZ];
- unsigned long rx_bytes, rx_packets, rx_errs, rx_drops,
- rx_fifo, rx_frame,
- tx_bytes, tx_packets, tx_errs, tx_drops,
- tx_fifo, tx_colls, tx_carrier, rx_multi;
- struct ip_tunnel_parm p1;
-
char buf[512];
+ int err = -1;
FILE *fp = fopen("/proc/net/dev", "r");
+
if (fp == NULL) {
perror("fopen");
return -1;
@@ -438,26 +399,22 @@ static int do_tunnels_list(struct ip_tunnel_parm *p)
if (!fgets(buf, sizeof(buf), fp) ||
!fgets(buf, sizeof(buf), fp)) {
fprintf(stderr, "/proc/net/dev read error\n");
- fclose(fp);
- return -1;
+ goto end;
}
while (fgets(buf, sizeof(buf), fp) != NULL) {
+ char name[IFNAMSIZ];
int index, type;
+ struct ip_tunnel_parm p1;
char *ptr;
+
buf[sizeof(buf) - 1] = 0;
- if ((ptr = strchr(buf, ':')) == NULL ||
+ ptr = strchr(buf, ':');
+ if (ptr == NULL ||
(*ptr++ = 0, sscanf(buf, "%s", name) != 1)) {
fprintf(stderr, "Wrong format for /proc/net/dev. Giving up.\n");
- fclose(fp);
- return -1;
+ goto end;
}
- if (sscanf(ptr, "%ld%ld%ld%ld%ld%ld%ld%*d%ld%ld%ld%ld%ld%ld%ld",
- &rx_bytes, &rx_packets, &rx_errs, &rx_drops,
- &rx_fifo, &rx_frame, &rx_multi,
- &tx_bytes, &tx_packets, &tx_errs, &tx_drops,
- &tx_fifo, &tx_colls, &tx_carrier) != 14)
- continue;
if (p->name[0] && strcmp(p->name, name))
continue;
index = ll_name_to_index(name);
@@ -480,48 +437,30 @@ static int do_tunnels_list(struct ip_tunnel_parm *p)
(p->i_key && p1.i_key != p->i_key))
continue;
print_tunnel(&p1);
- if (show_stats) {
- printf("%s", _SL_);
- printf("RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts%s", _SL_);
- printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-8ld%s",
- rx_packets, rx_bytes, rx_errs, rx_frame, rx_fifo, rx_multi, _SL_);
- printf("TX: Packets Bytes Errors DeadLoop NoRoute NoBufs%s", _SL_);
- printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-6ld",
- tx_packets, tx_bytes, tx_errs, tx_colls, tx_carrier, tx_drops);
- }
+ if (show_stats)
+ tnl_print_stats(ptr);
printf("\n");
}
+ err = 0;
+ end:
fclose(fp);
- return 0;
+ return err;
}
static int do_show(int argc, char **argv)
{
- int err;
struct ip_tunnel_parm p;
+ const char *basedev;
ll_init_map(&rth);
if (parse_args(argc, argv, SIOCGETTUNNEL, &p) < 0)
return -1;
- switch (p.iph.protocol) {
- case IPPROTO_IPIP:
- if (p.i_flags & VTI_ISVTI)
- err = tnl_get_ioctl(p.name[0] ? p.name : "ip_vti0", &p);
- else
- err = tnl_get_ioctl(p.name[0] ? p.name : "tunl0", &p);
- break;
- case IPPROTO_GRE:
- err = tnl_get_ioctl(p.name[0] ? p.name : "gre0", &p);
- break;
- case IPPROTO_IPV6:
- err = tnl_get_ioctl(p.name[0] ? p.name : "sit0", &p);
- break;
- default:
- do_tunnels_list(&p);
- return 0;
- }
- if (err)
+ basedev = tnl_defname(&p);
+ if (!basedev)
+ return do_tunnels_list(&p);
+
+ if (tnl_get_ioctl(p.name[0] ? p.name : basedev, &p))
return -1;
print_tunnel(&p);
@@ -562,11 +501,13 @@ static int do_prl(int argc, char **argv)
strncpy(medium, *argv, IFNAMSIZ-1);
devname++;
} else {
- fprintf(stderr,"Invalid PRL parameter \"%s\"\n", *argv);
+ fprintf(stderr,
+ "Invalid PRL parameter \"%s\"\n", *argv);
exit(-1);
}
if (count > 1) {
- fprintf(stderr,"One PRL entry at a time\n");
+ fprintf(stderr,
+ "One PRL entry at a time\n");
exit(-1);
}
argc--; argv++;
@@ -612,7 +553,8 @@ static int do_6rd(int argc, char **argv)
strncpy(medium, *argv, IFNAMSIZ-1);
devname++;
} else {
- fprintf(stderr,"Invalid 6RD parameter \"%s\"\n", *argv);
+ fprintf(stderr,
+ "Invalid 6RD parameter \"%s\"\n", *argv);
exit(-1);
}
argc--; argv++;
@@ -625,8 +567,35 @@ static int do_6rd(int argc, char **argv)
return tnl_6rd_ioctl(cmd, medium, &ip6rd);
}
+static int tunnel_mode_is_ipv6(char *tunnel_mode)
+{
+ static const char * const ipv6_modes[] = {
+ "ipv6/ipv6", "ip6ip6",
+ "vti6",
+ "ip/ipv6", "ipv4/ipv6", "ipip6", "ip4ip6",
+ "ip6gre", "gre/ipv6",
+ "any/ipv6", "any"
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ipv6_modes); i++) {
+ if (strcmp(ipv6_modes[i], tunnel_mode) == 0)
+ return 1;
+ }
+ return 0;
+}
+
int do_iptunnel(int argc, char **argv)
{
+ int i;
+
+ for (i = 0; i < argc - 1; i++) {
+ if (strcmp(argv[i], "mode") == 0) {
+ if (tunnel_mode_is_ipv6(argv[i + 1]))
+ preferred_family = AF_INET6;
+ break;
+ }
+ }
switch (preferred_family) {
case AF_UNSPEC:
preferred_family = AF_INET;
@@ -647,19 +616,19 @@ int do_iptunnel(int argc, char **argv)
if (argc > 0) {
if (matches(*argv, "add") == 0)
- return do_add(SIOCADDTUNNEL, argc-1, argv+1);
+ return do_add(SIOCADDTUNNEL, argc - 1, argv + 1);
if (matches(*argv, "change") == 0)
- return do_add(SIOCCHGTUNNEL, argc-1, argv+1);
+ return do_add(SIOCCHGTUNNEL, argc - 1, argv + 1);
if (matches(*argv, "delete") == 0)
- return do_del(argc-1, argv+1);
+ return do_del(argc - 1, argv + 1);
if (matches(*argv, "show") == 0 ||
matches(*argv, "lst") == 0 ||
matches(*argv, "list") == 0)
- return do_show(argc-1, argv+1);
+ return do_show(argc - 1, argv + 1);
if (matches(*argv, "prl") == 0)
- return do_prl(argc-1, argv+1);
+ return do_prl(argc - 1, argv + 1);
if (matches(*argv, "6rd") == 0)
- return do_6rd(argc-1, argv+1);
+ return do_6rd(argc - 1, argv + 1);
if (matches(*argv, "help") == 0)
usage();
} else
diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index 95f91a53..e583abf7 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -34,7 +34,6 @@
#include <netdb.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
-#include <linux/xfrm.h>
#include "utils.h"
#include "xfrm.h"
@@ -288,10 +287,10 @@ void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id,
fputs(title, fp);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s ", rt_addr_n2a(family,
+ fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr),
saddr, abuf, sizeof(abuf)));
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "dst %s", rt_addr_n2a(family,
+ fprintf(fp, "dst %s", rt_addr_n2a(family, sizeof(id->daddr),
&id->daddr, abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
@@ -455,11 +454,15 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family,
fputs(prefix, fp);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s/%u ", rt_addr_n2a(f, &sel->saddr, abuf, sizeof(abuf)),
+ fprintf(fp, "src %s/%u ",
+ rt_addr_n2a(f, sizeof(sel->saddr), &sel->saddr,
+ abuf, sizeof(abuf)),
sel->prefixlen_s);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "dst %s/%u ", rt_addr_n2a(f, &sel->daddr, abuf, sizeof(abuf)),
+ fprintf(fp, "dst %s/%u ",
+ rt_addr_n2a(f, sizeof(sel->daddr), &sel->daddr,
+ abuf, sizeof(abuf)),
sel->prefixlen_d);
if (sel->proto)
@@ -755,7 +758,8 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
memset(abuf, '\0', sizeof(abuf));
fprintf(fp, "addr %s",
- rt_addr_n2a(family, &e->encap_oa, abuf, sizeof(abuf)));
+ rt_addr_n2a(family, sizeof(e->encap_oa), &e->encap_oa,
+ abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
}
@@ -783,7 +787,7 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
memset(abuf, '\0', sizeof(abuf));
fprintf(fp, "%s",
- rt_addr_n2a(family, coa,
+ rt_addr_n2a(family, sizeof(*coa), coa,
abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
}
@@ -1339,6 +1343,7 @@ static int xfrm_selector_upspec_parse(struct xfrm_selector *sel,
case IPPROTO_UDP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
+ case IPPROTO_IP: /* to allow shared SA for different protocols */
break;
default:
fprintf(stderr, "\"sport\" and \"dport\" are invalid with PROTO value \"%s\"\n", strxf_proto(sel->proto));
diff --git a/ip/link_gre.c b/ip/link_gre.c
index 1d783876..c85741f5 100644
--- a/ip/link_gre.c
+++ b/ip/link_gre.c
@@ -53,7 +53,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
struct {
struct nlmsghdr n;
struct ifinfomsg i;
- char buf[1024];
+ char buf[16384];
} req;
struct ifinfomsg *ifi = (struct ifinfomsg *)(n + 1);
struct rtattr *tb[IFLA_MAX + 1];
@@ -74,6 +74,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
__u16 encapflags = 0;
__u16 encapsport = 0;
__u16 encapdport = 0;
+ __u8 metadata = 0;
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
memset(&req, 0, sizeof(req));
@@ -84,7 +85,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
req.i.ifi_family = preferred_family;
req.i.ifi_index = ifi->ifi_index;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) {
get_failed:
fprintf(stderr,
"Failed to get existing tunnel info.\n");
@@ -148,6 +149,9 @@ get_failed:
encapsport = rta_getattr_u16(greinfo[IFLA_GRE_ENCAP_SPORT]);
if (greinfo[IFLA_GRE_ENCAP_DPORT])
encapdport = rta_getattr_u16(greinfo[IFLA_GRE_ENCAP_DPORT]);
+
+ if (greinfo[IFLA_GRE_COLLECT_METADATA])
+ metadata = 1;
}
while (argc > 0) {
@@ -291,6 +295,8 @@ get_failed:
encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM;
} else if (strcmp(*argv, "noencap-remcsum") == 0) {
encapflags |= ~TUNNEL_ENCAP_FLAG_REMCSUM;
+ } else if (strcmp(*argv, "external") == 0) {
+ metadata = 1;
} else
usage();
argc--; argv++;
@@ -325,6 +331,8 @@ get_failed:
addattr16(n, 1024, IFLA_GRE_ENCAP_FLAGS, encapflags);
addattr16(n, 1024, IFLA_GRE_ENCAP_SPORT, htons(encapsport));
addattr16(n, 1024, IFLA_GRE_ENCAP_DPORT, htons(encapdport));
+ if (metadata)
+ addattr_l(n, 1024, IFLA_GRE_COLLECT_METADATA, NULL, 0);
return 0;
}
@@ -413,6 +421,9 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
if (oflags & GRE_CSUM)
fputs("ocsum ", f);
+ if (tb[IFLA_GRE_COLLECT_METADATA])
+ fputs("external ", f);
+
if (tb[IFLA_GRE_ENCAP_TYPE] &&
*(__u16 *)RTA_DATA(tb[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) {
__u16 type = rta_getattr_u16(tb[IFLA_GRE_ENCAP_TYPE]);
diff --git a/ip/link_gre6.c b/ip/link_gre6.c
index f18919cc..e00ea091 100644
--- a/ip/link_gre6.c
+++ b/ip/link_gre6.c
@@ -91,7 +91,7 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
req.i.ifi_family = preferred_family;
req.i.ifi_index = ifi->ifi_index;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) {
get_failed:
fprintf(stderr,
"Failed to get existing tunnel info.\n");
diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c
index 5ed3d5a2..f771c75d 100644
--- a/ip/link_ip6tnl.c
+++ b/ip/link_ip6tnl.c
@@ -89,7 +89,7 @@ static int ip6tunnel_parse_opt(struct link_util *lu, int argc, char **argv,
req.i.ifi_family = preferred_family;
req.i.ifi_index = ifi->ifi_index;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) {
get_failed:
fprintf(stderr,
"Failed to get existing tunnel info.\n");
@@ -285,6 +285,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb
if (tb[IFLA_IPTUN_REMOTE]) {
fprintf(f, "remote %s ",
rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[IFLA_IPTUN_REMOTE]),
RTA_DATA(tb[IFLA_IPTUN_REMOTE]),
s1, sizeof(s1)));
}
@@ -292,6 +293,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb
if (tb[IFLA_IPTUN_LOCAL]) {
fprintf(f, "local %s ",
rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[IFLA_IPTUN_LOCAL]),
RTA_DATA(tb[IFLA_IPTUN_LOCAL]),
s1, sizeof(s1)));
}
diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c
index cab174f9..9d6bc986 100644
--- a/ip/link_iptnl.c
+++ b/ip/link_iptnl.c
@@ -91,7 +91,7 @@ static int iptunnel_parse_opt(struct link_util *lu, int argc, char **argv,
req.i.ifi_family = preferred_family;
req.i.ifi_index = ifi->ifi_index;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) {
get_failed:
fprintf(stderr,
"Failed to get existing tunnel info.\n");
diff --git a/ip/link_vti.c b/ip/link_vti.c
index 59ac4c48..f3fea338 100644
--- a/ip/link_vti.c
+++ b/ip/link_vti.c
@@ -71,7 +71,7 @@ static int vti_parse_opt(struct link_util *lu, int argc, char **argv,
req.i.ifi_family = preferred_family;
req.i.ifi_index = ifi->ifi_index;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) {
get_failed:
fprintf(stderr,
"Failed to get existing tunnel info.\n");
diff --git a/ip/link_vti6.c b/ip/link_vti6.c
index 282896df..c146f791 100644
--- a/ip/link_vti6.c
+++ b/ip/link_vti6.c
@@ -67,7 +67,7 @@ static int vti6_parse_opt(struct link_util *lu, int argc, char **argv,
req.i.ifi_family = preferred_family;
req.i.ifi_index = ifi->ifi_index;
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0) {
get_failed:
fprintf(stderr,
"Failed to get existing tunnel info.\n");
diff --git a/ip/rtm_map.c b/ip/rtm_map.c
index 21e818b4..1d7d2c7e 100644
--- a/ip/rtm_map.c
+++ b/ip/rtm_map.c
@@ -93,7 +93,7 @@ int rtnl_rtntype_a2n(int *id, char *arg)
return 0;
}
-int get_rt_realms(__u32 *realms, char *arg)
+static int get_rt_realms(__u32 *realms, char *arg)
{
__u32 realm = 0;
char *p = strchr(arg, '/');
@@ -114,3 +114,11 @@ int get_rt_realms(__u32 *realms, char *arg)
*realms |= realm;
return 0;
}
+
+int get_rt_realms_or_raw(__u32 *realms, char *arg)
+{
+ if (!get_rt_realms(realms, arg))
+ return 0;
+
+ return get_unsigned(realms, arg, 0);
+}
diff --git a/ip/rtmon.c b/ip/rtmon.c
index ff685e53..42b24fb5 100644
--- a/ip/rtmon.c
+++ b/ip/rtmon.c
@@ -45,8 +45,8 @@ static void write_stamp(FILE *fp)
fwrite((void*)n1, 1, NLMSG_ALIGN(n1->nlmsg_len), fp);
}
-static int dump_msg(const struct sockaddr_nl *who, struct nlmsghdr *n,
- void *arg)
+static int dump_msg(const struct sockaddr_nl *who, struct rtnl_ctrl_data *ctrl,
+ struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
if (!init_phase)
@@ -56,6 +56,12 @@ static int dump_msg(const struct sockaddr_nl *who, struct nlmsghdr *n,
return 0;
}
+static int dump_msg2(const struct sockaddr_nl *who,
+ struct nlmsghdr *n, void *arg)
+{
+ return dump_msg(who, NULL, n, arg);
+}
+
static void usage(void)
{
fprintf(stderr, "Usage: rtmon file FILE [ all | LISTofOBJECTS]\n");
@@ -163,7 +169,7 @@ main(int argc, char **argv)
write_stamp(fp);
- if (rtnl_dump_filter(&rth, dump_msg, fp) < 0) {
+ if (rtnl_dump_filter(&rth, dump_msg2, fp) < 0) {
fprintf(stderr, "Dump terminated\n");
return 1;
}
diff --git a/ip/tcp_metrics.c b/ip/tcp_metrics.c
index bbbb4cc0..57b605fd 100644
--- a/ip/tcp_metrics.c
+++ b/ip/tcp_metrics.c
@@ -467,10 +467,10 @@ static int tcpm_do_cmd(int cmd, int argc, char **argv)
}
if (ack) {
- if (rtnl_talk(&grth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&grth, &req.n, NULL, 0) < 0)
return -2;
} else if (atype >= 0) {
- if (rtnl_talk(&grth, &req.n, 0, 0, &req.n) < 0)
+ if (rtnl_talk(&grth, &req.n, &req.n, sizeof(req)) < 0)
return -2;
if (process_msg(NULL, &req.n, stdout) < 0) {
fprintf(stderr, "Dump terminated\n");
@@ -508,4 +508,3 @@ int do_tcp_metrics(int argc, char **argv)
"try \"ip tcp_metrics help\".\n", *argv);
exit(-1);
}
-
diff --git a/ip/tunnel.c b/ip/tunnel.c
index 33c78e3f..39f825ba 100644
--- a/ip/tunnel.c
+++ b/ip/tunnel.c
@@ -73,7 +73,13 @@ int tnl_get_ioctl(const char *basedev, void *p)
strncpy(ifr.ifr_name, basedev, IFNAMSIZ);
ifr.ifr_ifru.ifru_data = (void*)p;
+
fd = socket(preferred_family, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "create socket failed: %s\n", strerror(errno));
+ return -1;
+ }
+
err = ioctl(fd, SIOCGETTUNNEL, &ifr);
if (err)
fprintf(stderr, "get tunnel \"%s\" failed: %s\n", basedev,
@@ -94,7 +100,13 @@ int tnl_add_ioctl(int cmd, const char *basedev, const char *name, void *p)
else
strncpy(ifr.ifr_name, basedev, IFNAMSIZ);
ifr.ifr_ifru.ifru_data = p;
+
fd = socket(preferred_family, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "create socket failed: %s\n", strerror(errno));
+ return -1;
+ }
+
err = ioctl(fd, cmd, &ifr);
if (err)
fprintf(stderr, "add tunnel \"%s\" failed: %s\n", ifr.ifr_name,
@@ -115,7 +127,13 @@ int tnl_del_ioctl(const char *basedev, const char *name, void *p)
strncpy(ifr.ifr_name, basedev, IFNAMSIZ);
ifr.ifr_ifru.ifru_data = p;
+
fd = socket(preferred_family, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "create socket failed: %s\n", strerror(errno));
+ return -1;
+ }
+
err = ioctl(fd, SIOCDELTUNNEL, &ifr);
if (err)
fprintf(stderr, "delete tunnel \"%s\" failed: %s\n",
@@ -133,7 +151,13 @@ static int tnl_gen_ioctl(int cmd, const char *name,
strncpy(ifr.ifr_name, name, IFNAMSIZ);
ifr.ifr_ifru.ifru_data = p;
+
fd = socket(preferred_family, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "create socket failed: %s\n", strerror(errno));
+ return -1;
+ }
+
err = ioctl(fd, cmd, &ifr);
if (err && errno != skiperr)
fprintf(stderr, "%s: ioctl %x failed: %s\n", name,
@@ -156,3 +180,46 @@ int tnl_ioctl_get_6rd(const char *name, void *p)
{
return tnl_gen_ioctl(SIOCGET6RD, name, p, EINVAL);
}
+
+__be32 tnl_parse_key(const char *name, const char *key)
+{
+ unsigned uval;
+
+ if (strchr(key, '.'))
+ return get_addr32(key);
+
+ if (get_unsigned(&uval, key, 0) < 0) {
+ fprintf(stderr, "invalid value for \"%s\": \"%s\";", name, key);
+ fprintf(stderr, " it should be an unsigned integer\n");
+ exit(-1);
+ }
+ return htonl(uval);
+}
+
+/* tnl_print_stats - print tunnel statistics
+ *
+ * @buf - tunnel interface's line in /proc/net/dev,
+ * starting past the interface name and following colon
+ */
+void tnl_print_stats(const char *buf)
+{
+ unsigned long rx_bytes, rx_packets, rx_errs, rx_drops,
+ rx_fifo, rx_frame,
+ tx_bytes, tx_packets, tx_errs, tx_drops,
+ tx_fifo, tx_colls, tx_carrier, rx_multi;
+
+ if (sscanf(buf, "%lu%lu%lu%lu%lu%lu%lu%*d%lu%lu%lu%lu%lu%lu%lu",
+ &rx_bytes, &rx_packets, &rx_errs, &rx_drops,
+ &rx_fifo, &rx_frame, &rx_multi,
+ &tx_bytes, &tx_packets, &tx_errs, &tx_drops,
+ &tx_fifo, &tx_colls, &tx_carrier) != 14)
+ return;
+
+ printf("%s", _SL_);
+ printf("RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts%s", _SL_);
+ printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-8ld%s",
+ rx_packets, rx_bytes, rx_errs, rx_frame, rx_fifo, rx_multi, _SL_);
+ printf("TX: Packets Bytes Errors DeadLoop NoRoute NoBufs%s", _SL_);
+ printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-6ld",
+ tx_packets, tx_bytes, tx_errs, tx_colls, tx_carrier, tx_drops);
+}
diff --git a/ip/tunnel.h b/ip/tunnel.h
index 9c2f5d29..9a03c0d7 100644
--- a/ip/tunnel.h
+++ b/ip/tunnel.h
@@ -31,5 +31,7 @@ int tnl_del_ioctl(const char *basedev, const char *name, void *p);
int tnl_prl_ioctl(int cmd, const char *name, void *p);
int tnl_6rd_ioctl(int cmd, const char *name, void *p);
int tnl_ioctl_get_6rd(const char *name, void *p);
+__be32 tnl_parse_key(const char *name, const char *key);
+void tnl_print_stats(const char *buf);
#endif
diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c
index 50116a7b..e6e991af 100644
--- a/ip/xfrm_monitor.c
+++ b/ip/xfrm_monitor.c
@@ -27,16 +27,18 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <linux/xfrm.h>
+#include <netinet/in.h>
+
#include "utils.h"
#include "xfrm.h"
#include "ip_common.h"
static void usage(void) __attribute__((noreturn));
+int listen_all_nsid;
static void usage(void)
{
- fprintf(stderr, "Usage: ip xfrm monitor [ all | OBJECTS | help ]\n");
+ fprintf(stderr, "Usage: ip xfrm monitor [all-nsid] [ all | OBJECTS | help ]\n");
fprintf(stderr, "OBJECTS := { acquire | expire | SA | aevent | policy | report }\n");
exit(-1);
}
@@ -227,7 +229,8 @@ static void xfrm_usersa_print(const struct xfrm_usersa_id *sa_id, __u32 reqid, F
buf[0] = 0;
fprintf(fp, "dst %s ",
- rt_addr_n2a(sa_id->family, &sa_id->daddr, buf, sizeof(buf)));
+ rt_addr_n2a(sa_id->family, sizeof(sa_id->daddr), &sa_id->daddr,
+ buf, sizeof(buf)));
fprintf(fp, " reqid 0x%x", reqid);
@@ -246,7 +249,8 @@ static int xfrm_ae_print(const struct sockaddr_nl *who,
xfrm_ae_flags_print(id->flags, arg);
fprintf(fp,"\n\t");
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, &id->saddr,
+ fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family,
+ sizeof(id->saddr), &id->saddr,
abuf, sizeof(abuf)));
xfrm_usersa_print(&id->sa_id, id->reqid, fp);
@@ -262,7 +266,7 @@ static void xfrm_print_addr(FILE *fp, int family, xfrm_address_t *a)
char buf[256];
buf[0] = 0;
- fprintf(fp, "%s", rt_addr_n2a(family, a, buf, sizeof(buf)));
+ fprintf(fp, "%s", rt_addr_n2a(family, sizeof(*a), a, buf, sizeof(buf)));
}
static int xfrm_mapping_print(const struct sockaddr_nl *who,
@@ -286,6 +290,7 @@ static int xfrm_mapping_print(const struct sockaddr_nl *who,
}
static int xfrm_accept_msg(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
@@ -293,6 +298,13 @@ static int xfrm_accept_msg(const struct sockaddr_nl *who,
if (timestamp)
print_timestamp(fp);
+ if (listen_all_nsid) {
+ if (ctrl == NULL || ctrl->nsid < 0)
+ fprintf(fp, "[nsid current]");
+ else
+ fprintf(fp, "[nsid %d]", ctrl->nsid);
+ }
+
switch (n->nlmsg_type) {
case XFRM_MSG_NEWSA:
case XFRM_MSG_DELSA:
@@ -355,6 +367,8 @@ int do_xfrm_monitor(int argc, char **argv)
if (matches(*argv, "file") == 0) {
NEXT_ARG();
file = *argv;
+ } else if (matches(*argv, "all-nsid") == 0) {
+ listen_all_nsid = 1;
} else if (matches(*argv, "acquire") == 0) {
lacquire=1;
groups = 0;
@@ -397,16 +411,22 @@ int do_xfrm_monitor(int argc, char **argv)
if (file) {
FILE *fp;
+ int err;
+
fp = fopen(file, "r");
if (fp == NULL) {
perror("Cannot fopen");
exit(-1);
}
- return rtnl_from_file(fp, xfrm_accept_msg, (void*)stdout);
+ err = rtnl_from_file(fp, xfrm_accept_msg, stdout);
+ fclose(fp);
+ return err;
}
if (rtnl_open_byproto(&rth, groups, NETLINK_XFRM) < 0)
exit(1);
+ if (listen_all_nsid && rtnl_listen_all_nsid(&rth) < 0)
+ exit(1);
if (rtnl_listen(&rth, xfrm_accept_msg, (void*)stdout) < 0)
exit(2);
diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c
index 2337d352..efea1e8d 100644
--- a/ip/xfrm_policy.c
+++ b/ip/xfrm_policy.c
@@ -29,7 +29,6 @@
#include <string.h>
#include <netdb.h>
#include <linux/netlink.h>
-#include <linux/xfrm.h>
#include "utils.h"
#include "xfrm.h"
#include "ip_common.h"
@@ -63,7 +62,8 @@ static void usage(void)
fprintf(stderr, " [ index INDEX ] [ ptype PTYPE ] [ action ACTION ] [ priority PRIORITY ]\n");
fprintf(stderr, " [ flag FLAG-LIST ]\n");
fprintf(stderr, "Usage: ip xfrm policy flush [ ptype PTYPE ]\n");
- fprintf(stderr, "Usage: ip xfrm count\n");
+ fprintf(stderr, "Usage: ip xfrm policy count\n");
+ fprintf(stderr, "Usage: ip xfrm policy set [ hthresh4 LBITS RBITS ] [ hthresh6 LBITS RBITS ]\n");
fprintf(stderr, "SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n");
fprintf(stderr, "UPSPEC := proto { { ");
fprintf(stderr, "%s | ", strxf_proto(IPPROTO_TCP));
@@ -392,7 +392,7 @@ static int xfrm_policy_modify(int cmd, unsigned flags, int argc, char **argv)
if (req.xpinfo.sel.family == AF_UNSPEC)
req.xpinfo.sel.family = AF_INET;
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
exit(2);
rtnl_close(&rth);
@@ -554,7 +554,7 @@ int xfrm_policy_print(const struct sockaddr_nl *who, struct nlmsghdr *n,
}
static int xfrm_policy_get_or_delete(int argc, char **argv, int delete,
- void *res_nlbuf)
+ void *res_nlbuf, size_t res_size)
{
struct rtnl_handle rth;
struct {
@@ -669,7 +669,7 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete,
(void *)&ctx, ctx.sctx.len);
}
- if (rtnl_talk(&rth, &req.n, 0, 0, res_nlbuf) < 0)
+ if (rtnl_talk(&rth, &req.n, res_nlbuf, res_size) < 0)
exit(2);
rtnl_close(&rth);
@@ -679,7 +679,7 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete,
static int xfrm_policy_delete(int argc, char **argv)
{
- return xfrm_policy_get_or_delete(argc, argv, 1, NULL);
+ return xfrm_policy_get_or_delete(argc, argv, 1, NULL, 0);
}
static int xfrm_policy_get(int argc, char **argv)
@@ -689,7 +689,7 @@ static int xfrm_policy_get(int argc, char **argv)
memset(buf, 0, sizeof(buf));
- xfrm_policy_get_or_delete(argc, argv, 0, n);
+ xfrm_policy_get_or_delete(argc, argv, 0, n, sizeof(buf));
if (xfrm_policy_print(NULL, n, (void*)stdout) < 0) {
fprintf(stderr, "An error :-)\n");
@@ -847,13 +847,23 @@ static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall)
xb.rth = &rth;
for (i = 0; ; i++) {
+ struct {
+ struct nlmsghdr n;
+ char buf[NLMSG_BUF_SIZE];
+ } req = {
+ .n.nlmsg_len = NLMSG_HDRLEN,
+ .n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .n.nlmsg_type = XFRM_MSG_GETPOLICY,
+ .n.nlmsg_seq = rth.dump = ++rth.seq,
+ };
+
xb.offset = 0;
xb.nlmsg_count = 0;
if (show_stats > 1)
fprintf(stderr, "Delete-all round = %d\n", i);
- if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETPOLICY) < 0) {
+ if (rtnl_send(&rth, (void *)&req, req.n.nlmsg_len) < 0) {
perror("Cannot send dump request");
exit(1);
}
@@ -879,7 +889,17 @@ static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall)
xb.nlmsg_count = 0;
}
} else {
- if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETPOLICY) < 0) {
+ struct {
+ struct nlmsghdr n;
+ char buf[NLMSG_BUF_SIZE];
+ } req = {
+ .n.nlmsg_len = NLMSG_HDRLEN,
+ .n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .n.nlmsg_type = XFRM_MSG_GETPOLICY,
+ .n.nlmsg_seq = rth.dump = ++rth.seq,
+ };
+
+ if (rtnl_send(&rth, (void *)&req, req.n.nlmsg_len) < 0) {
perror("Cannot send dump request");
exit(1);
}
@@ -934,7 +954,7 @@ static int print_spdinfo( struct nlmsghdr *n, void *arg)
fprintf(fp,")");
}
- fprintf(fp,"\n");
+ fprintf(fp, "%s", _SL_);
}
if (show_stats > 1) {
struct xfrmu_spdhinfo *sh;
@@ -948,13 +968,109 @@ static int print_spdinfo( struct nlmsghdr *n, void *arg)
fprintf(fp,"\t SPD buckets:");
fprintf(fp," count %d", sh->spdhcnt);
fprintf(fp," Max %d", sh->spdhmcnt);
+ fprintf(fp, "%s", _SL_);
+ }
+ if (tb[XFRMA_SPD_IPV4_HTHRESH]) {
+ struct xfrmu_spdhthresh *th;
+ if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV4_HTHRESH]) < sizeof(*th)) {
+ fprintf(stderr, "SPDinfo: Wrong len %d\n", len);
+ return -1;
+ }
+ th = RTA_DATA(tb[XFRMA_SPD_IPV4_HTHRESH]);
+ fprintf(fp,"\t SPD IPv4 thresholds:");
+ fprintf(fp," local %d", th->lbits);
+ fprintf(fp," remote %d", th->rbits);
+ fprintf(fp, "%s", _SL_);
+
+ }
+ if (tb[XFRMA_SPD_IPV6_HTHRESH]) {
+ struct xfrmu_spdhthresh *th;
+ if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV6_HTHRESH]) < sizeof(*th)) {
+ fprintf(stderr, "SPDinfo: Wrong len %d\n", len);
+ return -1;
+ }
+ th = RTA_DATA(tb[XFRMA_SPD_IPV6_HTHRESH]);
+ fprintf(fp,"\t SPD IPv6 thresholds:");
+ fprintf(fp," local %d", th->lbits);
+ fprintf(fp," remote %d", th->rbits);
+ fprintf(fp, "%s", _SL_);
}
}
- fprintf(fp,"\n");
+
+ if (oneline)
+ fprintf(fp, "\n");
return 0;
}
+static int xfrm_spd_setinfo(int argc, char **argv)
+{
+ struct rtnl_handle rth;
+ struct {
+ struct nlmsghdr n;
+ __u32 flags;
+ char buf[RTA_BUF_SIZE];
+ } req;
+
+ char *thr4 = NULL;
+ char *thr6 = NULL;
+
+ memset(&req, 0, sizeof(req));
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(__u32));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = XFRM_MSG_NEWSPDINFO;
+ req.flags = 0XFFFFFFFF;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "hthresh4") == 0) {
+ struct xfrmu_spdhthresh thr;
+
+ if (thr4)
+ duparg("hthresh4", *argv);
+ thr4 = *argv;
+ NEXT_ARG();
+ if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 32)
+ invarg("hthresh4 LBITS value is invalid", *argv);
+ NEXT_ARG();
+ if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 32)
+ invarg("hthresh4 RBITS value is invalid", *argv);
+
+ addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV4_HTHRESH,
+ (void *)&thr, sizeof(thr));
+ } else if (strcmp(*argv, "hthresh6") == 0) {
+ struct xfrmu_spdhthresh thr;
+
+ if (thr6)
+ duparg("hthresh6", *argv);
+ thr6 = *argv;
+ NEXT_ARG();
+ if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 128)
+ invarg("hthresh6 LBITS value is invalid", *argv);
+ NEXT_ARG();
+ if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 128)
+ invarg("hthresh6 RBITS value is invalid", *argv);
+
+ addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV6_HTHRESH,
+ (void *)&thr, sizeof(thr));
+ } else {
+ invarg("unknown", *argv);
+ }
+
+ argc--; argv++;
+ }
+
+ if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
+ exit(1);
+
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
+ exit(2);
+
+ rtnl_close(&rth);
+
+ return 0;
+}
+
static int xfrm_spd_getinfo(int argc, char **argv)
{
struct rtnl_handle rth;
@@ -974,7 +1090,7 @@ static int xfrm_spd_getinfo(int argc, char **argv)
if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
exit(1);
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0)
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0)
exit(2);
print_spdinfo(&req.n, (void*)stdout);
@@ -1026,7 +1142,7 @@ static int xfrm_policy_flush(int argc, char **argv)
if (show_stats > 1)
fprintf(stderr, "Flush policy\n");
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
exit(2);
rtnl_close(&rth);
@@ -1058,6 +1174,8 @@ int do_xfrm_policy(int argc, char **argv)
return xfrm_policy_flush(argc-1, argv+1);
if (matches(*argv, "count") == 0)
return xfrm_spd_getinfo(argc, argv);
+ if (matches(*argv, "set") == 0)
+ return xfrm_spd_setinfo(argc-1, argv+1);
if (matches(*argv, "help") == 0)
usage();
fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm policy help\".\n", *argv);
diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 2ad3d8d3..b5734da2 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -28,7 +28,6 @@
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
-#include <linux/xfrm.h>
#include "utils.h"
#include "xfrm.h"
#include "ip_common.h"
@@ -688,7 +687,7 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
if (req.xsinfo.family == AF_UNSPEC)
req.xsinfo.family = AF_INET;
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
exit(2);
rtnl_close(&rth);
@@ -825,7 +824,7 @@ static int xfrm_state_allocspi(int argc, char **argv)
req.xspi.info.family = AF_INET;
- if (rtnl_talk(&rth, &req.n, 0, 0, res_n) < 0)
+ if (rtnl_talk(&rth, &req.n, res_n, sizeof(res_buf)) < 0)
exit(2);
if (xfrm_state_print(NULL, res_n, (void*)stdout) < 0) {
@@ -1015,7 +1014,7 @@ static int xfrm_state_get_or_delete(int argc, char **argv, int delete)
req.xsid.family = AF_INET;
if (delete) {
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
exit(2);
} else {
char buf[NLMSG_BUF_SIZE];
@@ -1023,7 +1022,7 @@ static int xfrm_state_get_or_delete(int argc, char **argv, int delete)
memset(buf, 0, sizeof(buf));
- if (rtnl_talk(&rth, &req.n, 0, 0, res_n) < 0)
+ if (rtnl_talk(&rth, &req.n, res_n, sizeof(req)) < 0)
exit(2);
if (xfrm_state_print(NULL, res_n, (void*)stdout) < 0) {
@@ -1148,13 +1147,23 @@ static int xfrm_state_list_or_deleteall(int argc, char **argv, int deleteall)
xb.rth = &rth;
for (i = 0; ; i++) {
+ struct {
+ struct nlmsghdr n;
+ char buf[NLMSG_BUF_SIZE];
+ } req = {
+ .n.nlmsg_len = NLMSG_HDRLEN,
+ .n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .n.nlmsg_type = XFRM_MSG_GETSA,
+ .n.nlmsg_seq = rth.dump = ++rth.seq,
+ };
+
xb.offset = 0;
xb.nlmsg_count = 0;
if (show_stats > 1)
fprintf(stderr, "Delete-all round = %d\n", i);
- if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETSA) < 0) {
+ if (rtnl_send(&rth, (void *)&req, req.n.nlmsg_len) < 0) {
perror("Cannot send dump request");
exit(1);
}
@@ -1287,7 +1296,7 @@ static int xfrm_sad_getinfo(int argc, char **argv)
if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
exit(1);
- if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0)
+ if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0)
exit(2);
print_sadinfo(&req.n, (void*)stdout);
@@ -1341,7 +1350,7 @@ static int xfrm_state_flush(int argc, char **argv)
fprintf(stderr, "Flush state with XFRM-PROTO value \"%s\"\n",
strxf_xfrmproto(req.xsf.proto));
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
exit(2);
rtnl_close(&rth);
diff --git a/lib/Android.mk b/lib/Android.mk
index 73a9f781..3fa5a5ee 100644
--- a/lib/Android.mk
+++ b/lib/Android.mk
@@ -2,8 +2,8 @@ LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
- utils.c rt_names.c ll_types.c ll_proto.c ll_addr.c inet_proto.c \
- namespace.c names.c libgenl.c libnetlink.c
+ color.c utils.c rt_names.c ll_types.c ll_proto.c ll_addr.c inet_proto.c \
+ mpls_pton.c namespace.c names.c libgenl.c libnetlink.c
LOCAL_MODULE := libiprouteutil
LOCAL_SYSTEM_SHARED_LIBRARIES := libc
LOCAL_C_INCLUDES := $(LOCAL_PATH)/../include
diff --git a/lib/Makefile b/lib/Makefile
index 4c7cbc25..9d1307dd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -6,8 +6,9 @@ endif
CFLAGS += -fPIC
-UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o namespace.o \
- names.o
+UTILOBJ = utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o \
+ inet_proto.o namespace.o json_writer.o \
+ names.o color.o
NLOBJ=libgenl.o ll_map.o libnetlink.o
diff --git a/lib/color.c b/lib/color.c
new file mode 100644
index 00000000..8c9a48ba
--- /dev/null
+++ b/lib/color.c
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "color.h"
+
+enum color {
+ C_RED,
+ C_GREEN,
+ C_YELLOW,
+ C_BLUE,
+ C_MAGENTA,
+ C_CYAN,
+ C_WHITE,
+ C_CLEAR
+};
+
+static const char * const color_codes[] = {
+ "\e[31m",
+ "\e[32m",
+ "\e[33m",
+ "\e[34m",
+ "\e[35m",
+ "\e[36m",
+ "\e[37m",
+ "\e[0m",
+ NULL,
+};
+
+static enum color attr_colors[] = {
+ C_CYAN,
+ C_YELLOW,
+ C_MAGENTA,
+ C_BLUE,
+ C_GREEN,
+ C_RED
+};
+
+static int color_is_enabled;
+
+void enable_color(void)
+{
+ color_is_enabled = 1;
+}
+
+int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...)
+{
+ int ret = 0;
+ va_list args;
+
+ va_start(args, fmt);
+
+ if (!color_is_enabled) {
+ ret = vfprintf(fp, fmt, args);
+ goto end;
+ }
+
+ ret += fprintf(fp, "%s", color_codes[attr_colors[attr]]);
+ ret += vfprintf(fp, fmt, args);
+ ret += fprintf(fp, "%s", color_codes[C_CLEAR]);
+
+end:
+ va_end(args);
+ return ret;
+}
diff --git a/lib/coverity_model.c b/lib/coverity_model.c
new file mode 100644
index 00000000..c8963020
--- /dev/null
+++ b/lib/coverity_model.c
@@ -0,0 +1,19 @@
+/*
+ * Coverity Scan model
+ *
+ * This is a modeling file for Coverity Scan. Modeling helps to avoid false
+ * positives.
+ *
+ * - A model file can't import any header files.
+ * - Therefore only some built-in primitives like int, char and void are
+ * available but not wchar_t, NULL etc.
+ * - Modeling doesn't need full structs and typedefs. Rudimentary structs
+ * and similar types are sufficient.
+ * - An uninitialized local pointer is not an error. It signifies that the
+ * variable could be either NULL or have some data.
+ *
+ * Coverity Scan doesn't pick up modifications automatically. The model file
+ * must be uploaded by an admin.
+ */
+
+
diff --git a/lib/json_writer.c b/lib/json_writer.c
new file mode 100644
index 00000000..2af16e10
--- /dev/null
+++ b/lib/json_writer.c
@@ -0,0 +1,312 @@
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <stdint.h>
+
+#include "json_writer.h"
+
+struct json_writer {
+ FILE *out; /* output file */
+ unsigned depth; /* nesting */
+ bool pretty; /* optional whitepace */
+ char sep; /* either nul or comma */
+};
+
+/* indentation for pretty print */
+static void jsonw_indent(json_writer_t *self)
+{
+ unsigned i;
+ for (i = 0; i <= self->depth; ++i)
+ fputs(" ", self->out);
+}
+
+/* end current line and indent if pretty printing */
+static void jsonw_eol(json_writer_t *self)
+{
+ if (!self->pretty)
+ return;
+
+ putc('\n', self->out);
+ jsonw_indent(self);
+}
+
+/* If current object is not empty print a comma */
+static void jsonw_eor(json_writer_t *self)
+{
+ if (self->sep != '\0')
+ putc(self->sep, self->out);
+ self->sep = ',';
+}
+
+
+/* Output JSON encoded string */
+/* Handles C escapes, does not do Unicode */
+static void jsonw_puts(json_writer_t *self, const char *str)
+{
+ putc('"', self->out);
+ for (; *str; ++str)
+ switch (*str) {
+ case '\t':
+ fputs("\\t", self->out);
+ break;
+ case '\n':
+ fputs("\\n", self->out);
+ break;
+ case '\r':
+ fputs("\\r", self->out);
+ break;
+ case '\f':
+ fputs("\\f", self->out);
+ break;
+ case '\b':
+ fputs("\\b", self->out);
+ break;
+ case '\\':
+ fputs("\\n", self->out);
+ break;
+ case '"':
+ fputs("\\\"", self->out);
+ break;
+ case '\'':
+ fputs("\\\'", self->out);
+ break;
+ default:
+ putc(*str, self->out);
+ }
+ putc('"', self->out);
+}
+
+/* Create a new JSON stream */
+json_writer_t *jsonw_new(FILE *f)
+{
+ json_writer_t *self = malloc(sizeof(*self));
+ if (self) {
+ self->out = f;
+ self->depth = 0;
+ self->pretty = false;
+ self->sep = '\0';
+ putc('{', self->out);
+ }
+ return self;
+}
+
+/* End output to JSON stream */
+void jsonw_destroy(json_writer_t **self_p)
+{
+ json_writer_t *self = *self_p;
+
+ assert(self->depth == 0);
+ jsonw_eol(self);
+ fputs("}\n", self->out);
+ fflush(self->out);
+ free(self);
+ *self_p = NULL;
+}
+
+void jsonw_pretty(json_writer_t *self, bool on)
+{
+ self->pretty = on;
+}
+
+/* Basic blocks */
+static void jsonw_begin(json_writer_t *self, int c)
+{
+ jsonw_eor(self);
+ putc(c, self->out);
+ ++self->depth;
+ self->sep = '\0';
+}
+
+static void jsonw_end(json_writer_t *self, int c)
+{
+ assert(self->depth > 0);
+
+ --self->depth;
+ if (self->sep != '\0')
+ jsonw_eol(self);
+ putc(c, self->out);
+ self->sep = ',';
+}
+
+
+/* Add a JSON property name */
+void jsonw_name(json_writer_t *self, const char *name)
+{
+ jsonw_eor(self);
+ jsonw_eol(self);
+ self->sep = '\0';
+ jsonw_puts(self, name);
+ putc(':', self->out);
+ if (self->pretty)
+ putc(' ', self->out);
+}
+
+static void jsonw_printf(json_writer_t *self, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ jsonw_eor(self);
+ vfprintf(self->out, fmt, ap);
+ va_end(ap);
+}
+
+/* Collections */
+void jsonw_start_object(json_writer_t *self)
+{
+ jsonw_begin(self, '{');
+}
+
+void jsonw_end_object(json_writer_t *self)
+{
+ jsonw_end(self, '}');
+}
+
+void jsonw_start_array(json_writer_t *self)
+{
+ jsonw_begin(self, '[');
+}
+
+void jsonw_end_array(json_writer_t *self)
+{
+ jsonw_end(self, ']');
+}
+
+/* JSON value types */
+void jsonw_string(json_writer_t *self, const char *value)
+{
+ jsonw_eor(self);
+ jsonw_puts(self, value);
+}
+
+void jsonw_bool(json_writer_t *self, bool val)
+{
+ jsonw_printf(self, "%s", val ? "true" : "false");
+}
+
+#ifdef notused
+void jsonw_null(json_writer_t *self)
+{
+ jsonw_printf(self, "null");
+}
+
+void jsonw_float(json_writer_t *self, double num)
+{
+ jsonw_printf(self, "%g", num);
+}
+#endif
+
+void jsonw_uint(json_writer_t *self, uint64_t num)
+{
+ jsonw_printf(self, "%"PRIu64, num);
+}
+
+void jsonw_int(json_writer_t *self, int64_t num)
+{
+ jsonw_printf(self, "%"PRId64, num);
+}
+
+/* Basic name/value objects */
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val)
+{
+ jsonw_name(self, prop);
+ jsonw_string(self, val);
+}
+
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool val)
+{
+ jsonw_name(self, prop);
+ jsonw_bool(self, val);
+}
+
+#ifdef notused
+void jsonw_float_field(json_writer_t *self, const char *prop, double val)
+{
+ jsonw_name(self, prop);
+ jsonw_float(self, val);
+}
+#endif
+
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_uint(self, num);
+}
+
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_int(self, num);
+}
+
+#ifdef notused
+void jsonw_null_field(json_writer_t *self, const char *prop)
+{
+ jsonw_name(self, prop);
+ jsonw_null(self);
+}
+#endif
+
+#ifdef TEST
+int main(int argc, char **argv)
+{
+ json_writer_t *wr = jsonw_new(stdout);
+
+ jsonw_pretty(wr, true);
+ jsonw_name(wr, "Vyatta");
+ jsonw_start_object(wr);
+ jsonw_string_field(wr, "url", "http://vyatta.com");
+ jsonw_uint_field(wr, "downloads", 2000000ul);
+ jsonw_float_field(wr, "stock", 8.16);
+
+ jsonw_name(wr, "ARGV");
+ jsonw_start_array(wr);
+ while (--argc)
+ jsonw_string(wr, *++argv);
+ jsonw_end_array(wr);
+
+ jsonw_name(wr, "empty");
+ jsonw_start_array(wr);
+ jsonw_end_array(wr);
+
+ jsonw_name(wr, "NIL");
+ jsonw_start_object(wr);
+ jsonw_end_object(wr);
+
+ jsonw_null_field(wr, "my_null");
+
+ jsonw_name(wr, "special chars");
+ jsonw_start_array(wr);
+ jsonw_string_field(wr, "slash", "/");
+ jsonw_string_field(wr, "newline", "\n");
+ jsonw_string_field(wr, "tab", "\t");
+ jsonw_string_field(wr, "ff", "\f");
+ jsonw_string_field(wr, "quote", "\"");
+ jsonw_string_field(wr, "tick", "\'");
+ jsonw_string_field(wr, "backslash", "\\");
+ jsonw_end_array(wr);
+
+ jsonw_end_object(wr);
+
+ jsonw_destroy(&wr);
+ return 0;
+}
+
+#endif
diff --git a/lib/libgenl.c b/lib/libgenl.c
index ef3e5db6..acb14783 100644
--- a/lib/libgenl.c
+++ b/lib/libgenl.c
@@ -53,7 +53,7 @@ int genl_resolve_family(struct rtnl_handle *grth, const char *family)
addattr_l(&req.n, sizeof(req), CTRL_ATTR_FAMILY_NAME,
family, strlen(family) + 1);
- if (rtnl_talk(grth, &req.n, 0, 0, &req.n) < 0) {
+ if (rtnl_talk(grth, &req.n, &req.n, sizeof(req)) < 0) {
fprintf(stderr, "Error talking to the kernel\n");
return -2;
}
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 77e07ef7..d6b5fd3e 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -25,6 +25,14 @@
#include "libnetlink.h"
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
int rcvbuf = 1024 * 1024;
void rtnl_close(struct rtnl_handle *rth)
@@ -183,6 +191,27 @@ int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len)
return sendmsg(rth->fd, &msg, 0);
}
+int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct iovec iov = {
+ .iov_base = (void*) n,
+ .iov_len = n->nlmsg_len
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ n->nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST;
+ n->nlmsg_pid = 0;
+ n->nlmsg_seq = rth->dump = ++rth->seq;
+
+ return sendmsg(rth->fd, &msg, 0);
+}
+
int rtnl_dump_filter_l(struct rtnl_handle *rth,
const struct rtnl_dump_filter_arg *arg)
{
@@ -230,6 +259,8 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
while (NLMSG_OK(h, msglen)) {
int err = 0;
+ h->nlmsg_flags &= ~a->nc_flags;
+
if (nladdr.nl_pid != 0 ||
h->nlmsg_pid != rth->local.nl_pid ||
h->nlmsg_seq != rth->dump)
@@ -288,20 +319,20 @@ skip_it:
}
}
-int rtnl_dump_filter(struct rtnl_handle *rth,
+int rtnl_dump_filter_nc(struct rtnl_handle *rth,
rtnl_filter_t filter,
- void *arg1)
+ void *arg1, __u16 nc_flags)
{
const struct rtnl_dump_filter_arg a[2] = {
- { .filter = filter, .arg1 = arg1, },
- { .filter = NULL, .arg1 = NULL, },
+ { .filter = filter, .arg1 = arg1, .nc_flags = nc_flags, },
+ { .filter = NULL, .arg1 = NULL, .nc_flags = 0, },
};
return rtnl_dump_filter_l(rth, a);
}
-int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
- unsigned groups, struct nlmsghdr *answer)
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t maxlen)
{
int status;
unsigned seq;
@@ -317,12 +348,10 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
.msg_iov = &iov,
.msg_iovlen = 1,
};
- char buf[16384];
+ char buf[32768];
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
- nladdr.nl_pid = peer;
- nladdr.nl_groups = groups;
n->nlmsg_seq = seq = ++rtnl->seq;
@@ -330,7 +359,6 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
n->nlmsg_flags |= NLM_F_ACK;
status = sendmsg(rtnl->fd, &msg, 0);
-
if (status < 0) {
perror("Cannot talk to rtnetlink");
return -1;
@@ -339,7 +367,6 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
memset(buf,0,sizeof(buf));
iov.iov_base = buf;
-
while (1) {
iov.iov_len = sizeof(buf);
status = recvmsg(rtnl->fd, &msg, 0);
@@ -372,7 +399,7 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
exit(1);
}
- if (nladdr.nl_pid != peer ||
+ if (nladdr.nl_pid != 0 ||
h->nlmsg_pid != rtnl->local.nl_pid ||
h->nlmsg_seq != seq) {
/* Don't forget to skip that message. */
@@ -385,20 +412,24 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
if (l < sizeof(struct nlmsgerr)) {
fprintf(stderr, "ERROR truncated\n");
- } else {
- if (!err->error) {
- if (answer)
- memcpy(answer, h, h->nlmsg_len);
- return 0;
- }
-
- fprintf(stderr, "RTNETLINK answers: %s\n", strerror(-err->error));
- errno = -err->error;
+ } else if (!err->error) {
+ if (answer)
+ memcpy(answer, h,
+ MIN(maxlen, h->nlmsg_len));
+ return 0;
}
+
+ if (rtnl->proto != NETLINK_SOCK_DIAG)
+ fprintf(stderr,
+ "RTNETLINK answers: %s\n",
+ strerror(-err->error));
+ errno = -err->error;
return -1;
}
+
if (answer) {
- memcpy(answer, h, h->nlmsg_len);
+ memcpy(answer, h,
+ MIN(maxlen, h->nlmsg_len));
return 0;
}
@@ -407,10 +438,12 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
}
+
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Message truncated\n");
continue;
}
+
if (status) {
fprintf(stderr, "!!!Remnant of size %d\n", status);
exit(1);
@@ -418,8 +451,21 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
}
}
+int rtnl_listen_all_nsid(struct rtnl_handle *rth)
+{
+ unsigned int on = 1;
+
+ if (setsockopt(rth->fd, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, &on,
+ sizeof(on)) < 0) {
+ perror("NETLINK_LISTEN_ALL_NSID");
+ return -1;
+ }
+ rth->flags |= RTNL_HANDLE_F_LISTEN_ALL_NSID;
+ return 0;
+}
+
int rtnl_listen(struct rtnl_handle *rtnl,
- rtnl_filter_t handler,
+ rtnl_listen_filter_t handler,
void *jarg)
{
int status;
@@ -433,6 +479,12 @@ int rtnl_listen(struct rtnl_handle *rtnl,
.msg_iovlen = 1,
};
char buf[16384];
+ char cmsgbuf[BUFSIZ];
+
+ if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) {
+ msg.msg_control = &cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+ }
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
@@ -441,6 +493,9 @@ int rtnl_listen(struct rtnl_handle *rtnl,
iov.iov_base = buf;
while (1) {
+ struct rtnl_ctrl_data ctrl;
+ struct cmsghdr *cmsg;
+
iov.iov_len = sizeof(buf);
status = recvmsg(rtnl->fd, &msg, 0);
@@ -461,6 +516,21 @@ int rtnl_listen(struct rtnl_handle *rtnl,
fprintf(stderr, "Sender address length == %d\n", msg.msg_namelen);
exit(1);
}
+
+ if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) {
+ memset(&ctrl, 0, sizeof(ctrl));
+ ctrl.nsid = -1;
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg;
+ cmsg = CMSG_NXTHDR(&msg, cmsg))
+ if (cmsg->cmsg_level == SOL_NETLINK &&
+ cmsg->cmsg_type == NETLINK_LISTEN_ALL_NSID &&
+ cmsg->cmsg_len == CMSG_LEN(sizeof(int))) {
+ int *data = (int *)CMSG_DATA(cmsg);
+
+ ctrl.nsid = *data;
+ }
+ }
+
for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
int err;
int len = h->nlmsg_len;
@@ -475,7 +545,7 @@ int rtnl_listen(struct rtnl_handle *rtnl,
exit(1);
}
- err = handler(&nladdr, h, jarg);
+ err = handler(&nladdr, &ctrl, h, jarg);
if (err < 0)
return err;
@@ -493,7 +563,7 @@ int rtnl_listen(struct rtnl_handle *rtnl,
}
}
-int rtnl_from_file(FILE *rtnl, rtnl_filter_t handler,
+int rtnl_from_file(FILE *rtnl, rtnl_listen_filter_t handler,
void *jarg)
{
int status;
@@ -541,7 +611,7 @@ int rtnl_from_file(FILE *rtnl, rtnl_filter_t handler,
return -1;
}
- err = handler(&nladdr, h, jarg);
+ err = handler(&nladdr, NULL, h, jarg);
if (err < 0)
return err;
}
@@ -676,6 +746,37 @@ int rta_addattr_l(struct rtattr *rta, int maxlen, int type,
return 0;
}
+int rta_addattr8(struct rtattr *rta, int maxlen, int type, __u8 data)
+{
+ return rta_addattr_l(rta, maxlen, type, &data, sizeof(__u8));
+}
+
+int rta_addattr16(struct rtattr *rta, int maxlen, int type, __u16 data)
+{
+ return rta_addattr_l(rta, maxlen, type, &data, sizeof(__u16));
+}
+
+int rta_addattr64(struct rtattr *rta, int maxlen, int type, __u64 data)
+{
+ return rta_addattr_l(rta, maxlen, type, &data, sizeof(__u64));
+}
+
+struct rtattr *rta_nest(struct rtattr *rta, int maxlen, int type)
+{
+ struct rtattr *nest = RTA_TAIL(rta);
+
+ rta_addattr_l(rta, maxlen, type, NULL, 0);
+
+ return nest;
+}
+
+int rta_nest_end(struct rtattr *rta, struct rtattr *nest)
+{
+ nest->rta_len = (void *)RTA_TAIL(rta) - (void *)nest;
+
+ return rta->rta_len;
+}
+
int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
{
return parse_rtattr_flags(tb, max, rta, len, 0);
diff --git a/lib/ll_addr.c b/lib/ll_addr.c
index c12ab075..2ce9abfb 100644
--- a/lib/ll_addr.c
+++ b/lib/ll_addr.c
@@ -29,7 +29,7 @@
#include "utils.h"
-const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen)
+const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen)
{
int i;
int l;
diff --git a/lib/ll_map.c b/lib/ll_map.c
index db34a2aa..c6f70274 100644
--- a/lib/ll_map.c
+++ b/lib/ll_map.c
@@ -52,7 +52,7 @@ static struct ll_cache *ll_get_by_index(unsigned index)
return NULL;
}
-static unsigned namehash(const char *str)
+unsigned namehash(const char *str)
{
unsigned hash = 5381;
diff --git a/lib/mpls_ntop.c b/lib/mpls_ntop.c
new file mode 100644
index 00000000..945d6d5e
--- /dev/null
+++ b/lib/mpls_ntop.c
@@ -0,0 +1,48 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+static const char *mpls_ntop1(const struct mpls_label *addr, char *buf, size_t buflen)
+{
+ size_t destlen = buflen;
+ char *dest = buf;
+ int count;
+
+ for (count = 0; count < MPLS_MAX_LABELS; count++) {
+ uint32_t entry = ntohl(addr[count].entry);
+ uint32_t label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ int len = snprintf(dest, destlen, "%u", label);
+
+ /* Is this the end? */
+ if (entry & MPLS_LS_S_MASK)
+ return buf;
+
+
+ dest += len;
+ destlen -= len;
+ if (destlen) {
+ *dest = '/';
+ dest++;
+ destlen--;
+ }
+ }
+ errno = -E2BIG;
+ return NULL;
+}
+
+const char *mpls_ntop(int af, const void *addr, char *buf, size_t buflen)
+{
+ switch(af) {
+ case AF_MPLS:
+ errno = 0;
+ return mpls_ntop1((struct mpls_label *)addr, buf, buflen);
+ default:
+ errno = EAFNOSUPPORT;
+ }
+
+ return NULL;
+}
diff --git a/lib/mpls_pton.c b/lib/mpls_pton.c
new file mode 100644
index 00000000..bd448cfc
--- /dev/null
+++ b/lib/mpls_pton.c
@@ -0,0 +1,58 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+
+static int mpls_pton1(const char *name, struct mpls_label *addr)
+{
+ char *endp;
+ unsigned count;
+
+ for (count = 0; count < MPLS_MAX_LABELS; count++) {
+ unsigned long label;
+
+ label = strtoul(name, &endp, 0);
+ /* Fail when the label value is out or range */
+ if (label >= (1 << 20))
+ return 0;
+
+ if (endp == name) /* no digits */
+ return 0;
+
+ addr->entry = htonl(label << MPLS_LS_LABEL_SHIFT);
+ if (*endp == '\0') {
+ addr->entry |= htonl(1 << MPLS_LS_S_SHIFT);
+ return 1;
+ }
+
+ /* Bad character in the address */
+ if (*endp != '/')
+ return 0;
+
+ name = endp + 1;
+ addr += 1;
+ }
+ /* The address was too long */
+ return 0;
+}
+
+int mpls_pton(int af, const char *src, void *addr)
+{
+ int err;
+
+ switch(af) {
+ case AF_MPLS:
+ errno = 0;
+ err = mpls_pton1(src, (struct mpls_label *)addr);
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ err = -1;
+ }
+
+ return err;
+}
diff --git a/lib/namespace.c b/lib/namespace.c
index c03a103a..30b51388 100644
--- a/lib/namespace.c
+++ b/lib/namespace.c
@@ -9,15 +9,16 @@
#include <fcntl.h>
#include <dirent.h>
+#include <limits.h>
#include "utils.h"
#include "namespace.h"
static void bind_etc(const char *name)
{
- char etc_netns_path[MAXPATHLEN];
- char netns_name[MAXPATHLEN];
- char etc_name[MAXPATHLEN];
+ char etc_netns_path[PATH_MAX];
+ char netns_name[PATH_MAX];
+ char etc_name[PATH_MAX];
struct dirent *entry;
DIR *dir;
@@ -43,7 +44,7 @@ static void bind_etc(const char *name)
int netns_switch(char *name)
{
- char net_path[MAXPATHLEN];
+ char net_path[PATH_MAX];
int netns;
snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name);
@@ -57,8 +58,10 @@ int netns_switch(char *name)
if (setns(netns, CLONE_NEWNET) < 0) {
fprintf(stderr, "setting the network namespace \"%s\" failed: %s\n",
name, strerror(errno));
+ close(netns);
return -1;
}
+ close(netns);
if (unshare(CLONE_NEWNS) < 0) {
fprintf(stderr, "unshare failed: %s\n", strerror(errno));
@@ -87,7 +90,7 @@ int netns_switch(char *name)
int netns_get_fd(const char *name)
{
- char pathbuf[MAXPATHLEN];
+ char pathbuf[PATH_MAX];
const char *path, *ptr;
path = name;
diff --git a/lib/rt_names.c b/lib/rt_names.c
index e87c65da..f6d17c0e 100644
--- a/lib/rt_names.c
+++ b/lib/rt_names.c
@@ -17,21 +17,19 @@
#include <string.h>
#include <sys/time.h>
#include <sys/socket.h>
+#include <dirent.h>
#include <asm/types.h>
#include <linux/rtnetlink.h>
#include "rt_names.h"
-
-#ifndef CONFDIR
-#define CONFDIR "/etc/iproute2"
-#endif
+#include "utils.h"
#define NAME_MAX_LEN 512
struct rtnl_hash_entry {
- struct rtnl_hash_entry *next;
- const char * name;
+ struct rtnl_hash_entry *next;
+ const char *name;
unsigned int id;
};
@@ -81,7 +79,7 @@ rtnl_hash_initialize(const char *file, struct rtnl_hash_entry **hash, int size)
return;
}
- if (id<0)
+ if (id < 0)
continue;
entry = malloc(sizeof(*entry));
@@ -111,7 +109,7 @@ static void rtnl_tab_initialize(const char *file, char **tab, int size)
fclose(fp);
return;
}
- if (id<0 || id>size)
+ if (id < 0 || id > size)
continue;
tab[id] = strdup(namebuf);
@@ -119,23 +117,23 @@ static void rtnl_tab_initialize(const char *file, char **tab, int size)
fclose(fp);
}
-static char * rtnl_rtprot_tab[256] = {
- [RTPROT_UNSPEC] = "none",
- [RTPROT_REDIRECT] ="redirect",
- [RTPROT_KERNEL] = "kernel",
- [RTPROT_BOOT] = "boot",
- [RTPROT_STATIC] = "static",
-
- [RTPROT_GATED] = "gated",
- [RTPROT_RA] = "ra",
- [RTPROT_MRT] = "mrt",
- [RTPROT_ZEBRA] ="zebra",
- [RTPROT_BIRD] = "bird",
- [RTPROT_BABEL] = "babel",
+static char *rtnl_rtprot_tab[256] = {
+ [RTPROT_UNSPEC] = "none",
+ [RTPROT_REDIRECT] = "redirect",
+ [RTPROT_KERNEL] = "kernel",
+ [RTPROT_BOOT] = "boot",
+ [RTPROT_STATIC] = "static",
+
+ [RTPROT_GATED] = "gated",
+ [RTPROT_RA] = "ra",
+ [RTPROT_MRT] = "mrt",
+ [RTPROT_ZEBRA] = "zebra",
+ [RTPROT_BIRD] = "bird",
+ [RTPROT_BABEL] = "babel",
[RTPROT_DNROUTED] = "dnrouted",
- [RTPROT_XORP] = "xorp",
- [RTPROT_NTK] = "ntk",
- [RTPROT_DHCP] = "dhcp",
+ [RTPROT_XORP] = "xorp",
+ [RTPROT_NTK] = "ntk",
+ [RTPROT_DHCP] = "dhcp",
};
@@ -148,9 +146,9 @@ static void rtnl_rtprot_initialize(void)
rtnl_rtprot_tab, 256);
}
-const char * rtnl_rtprot_n2a(int id, char *buf, int len)
+const char *rtnl_rtprot_n2a(int id, char *buf, int len)
{
- if (id<0 || id>=256) {
+ if (id < 0 || id >= 256) {
snprintf(buf, len, "%u", id);
return buf;
}
@@ -166,7 +164,7 @@ const char * rtnl_rtprot_n2a(int id, char *buf, int len)
int rtnl_rtprot_a2n(__u32 *id, const char *arg)
{
- static char *cache = NULL;
+ static char *cache;
static unsigned long res;
char *end;
int i;
@@ -179,7 +177,7 @@ int rtnl_rtprot_a2n(__u32 *id, const char *arg)
if (!rtnl_rtprot_init)
rtnl_rtprot_initialize();
- for (i=0; i<256; i++) {
+ for (i = 0; i < 256; i++) {
if (rtnl_rtprot_tab[i] &&
strcmp(rtnl_rtprot_tab[i], arg) == 0) {
cache = rtnl_rtprot_tab[i];
@@ -196,8 +194,13 @@ int rtnl_rtprot_a2n(__u32 *id, const char *arg)
return 0;
}
-static char * rtnl_rtscope_tab[256] = {
- "global",
+
+static char *rtnl_rtscope_tab[256] = {
+ [RT_SCOPE_UNIVERSE] = "global",
+ [RT_SCOPE_NOWHERE] = "nowhere",
+ [RT_SCOPE_HOST] = "host",
+ [RT_SCOPE_LINK] = "link",
+ [RT_SCOPE_SITE] = "site",
};
static int rtnl_rtscope_init;
@@ -205,33 +208,32 @@ static int rtnl_rtscope_init;
static void rtnl_rtscope_initialize(void)
{
rtnl_rtscope_init = 1;
- rtnl_rtscope_tab[RT_SCOPE_NOWHERE] = "nowhere";
- rtnl_rtscope_tab[RT_SCOPE_HOST] = "host";
- rtnl_rtscope_tab[RT_SCOPE_LINK] = "link";
- rtnl_rtscope_tab[RT_SCOPE_SITE] = "site";
rtnl_tab_initialize(CONFDIR "/rt_scopes",
rtnl_rtscope_tab, 256);
}
const char *rtnl_rtscope_n2a(int id, char *buf, int len)
{
- if (id<0 || id>=256) {
+ if (id < 0 || id >= 256) {
snprintf(buf, len, "%d", id);
return buf;
}
+
if (!rtnl_rtscope_tab[id]) {
if (!rtnl_rtscope_init)
rtnl_rtscope_initialize();
}
+
if (rtnl_rtscope_tab[id])
return rtnl_rtscope_tab[id];
+
snprintf(buf, len, "%d", id);
return buf;
}
int rtnl_rtscope_a2n(__u32 *id, const char *arg)
{
- static const char *cache = NULL;
+ static const char *cache;
static unsigned long res;
char *end;
int i;
@@ -244,7 +246,7 @@ int rtnl_rtscope_a2n(__u32 *id, const char *arg)
if (!rtnl_rtscope_init)
rtnl_rtscope_initialize();
- for (i=0; i<256; i++) {
+ for (i = 0; i < 256; i++) {
if (rtnl_rtscope_tab[i] &&
strcmp(rtnl_rtscope_tab[i], arg) == 0) {
cache = rtnl_rtscope_tab[i];
@@ -262,7 +264,7 @@ int rtnl_rtscope_a2n(__u32 *id, const char *arg)
}
-static char * rtnl_rtrealm_tab[256] = {
+static char *rtnl_rtrealm_tab[256] = {
"unknown",
};
@@ -277,7 +279,7 @@ static void rtnl_rtrealm_initialize(void)
const char *rtnl_rtrealm_n2a(int id, char *buf, int len)
{
- if (id<0 || id>=256) {
+ if (id < 0 || id >= 256) {
snprintf(buf, len, "%d", id);
return buf;
}
@@ -294,7 +296,7 @@ const char *rtnl_rtrealm_n2a(int id, char *buf, int len)
int rtnl_rtrealm_a2n(__u32 *id, const char *arg)
{
- static char *cache = NULL;
+ static char *cache;
static unsigned long res;
char *end;
int i;
@@ -307,7 +309,7 @@ int rtnl_rtrealm_a2n(__u32 *id, const char *arg)
if (!rtnl_rtrealm_init)
rtnl_rtrealm_initialize();
- for (i=0; i<256; i++) {
+ for (i = 0; i < 256; i++) {
if (rtnl_rtrealm_tab[i] &&
strcmp(rtnl_rtrealm_tab[i], arg) == 0) {
cache = rtnl_rtrealm_tab[i];
@@ -329,7 +331,7 @@ static struct rtnl_hash_entry dflt_table_entry = { .name = "default" };
static struct rtnl_hash_entry main_table_entry = { .name = "main" };
static struct rtnl_hash_entry local_table_entry = { .name = "local" };
-static struct rtnl_hash_entry * rtnl_rttable_hash[256] = {
+static struct rtnl_hash_entry *rtnl_rttable_hash[256] = {
[RT_TABLE_DEFAULT] = &dflt_table_entry,
[RT_TABLE_MAIN] = &main_table_entry,
[RT_TABLE_LOCAL] = &local_table_entry,
@@ -339,6 +341,8 @@ static int rtnl_rttable_init;
static void rtnl_rttable_initialize(void)
{
+ struct dirent *de;
+ DIR *d;
int i;
rtnl_rttable_init = 1;
@@ -348,9 +352,33 @@ static void rtnl_rttable_initialize(void)
}
rtnl_hash_initialize(CONFDIR "/rt_tables",
rtnl_rttable_hash, 256);
+
+ d = opendir(CONFDIR "/rt_tables.d");
+ if (!d)
+ return;
+
+ while ((de = readdir(d)) != NULL) {
+ char path[PATH_MAX];
+ size_t len;
+
+ if (*de->d_name == '.')
+ continue;
+
+ /* only consider filenames ending in '.conf' */
+ len = strlen(de->d_name);
+ if (len <= 5)
+ continue;
+ if (strcmp(de->d_name + len - 5, ".conf"))
+ continue;
+
+ snprintf(path, sizeof(path),
+ CONFDIR "/rt_tables.d/%s", de->d_name);
+ rtnl_hash_initialize(path, rtnl_rttable_hash, 256);
+ }
+ closedir(d);
}
-const char * rtnl_rttable_n2a(__u32 id, char *buf, int len)
+const char *rtnl_rttable_n2a(__u32 id, char *buf, int len)
{
struct rtnl_hash_entry *entry;
@@ -371,7 +399,7 @@ const char * rtnl_rttable_n2a(__u32 id, char *buf, int len)
int rtnl_rttable_a2n(__u32 *id, const char *arg)
{
- static const char *cache = NULL;
+ static const char *cache;
static unsigned long res;
struct rtnl_hash_entry *entry;
char *end;
@@ -385,7 +413,7 @@ int rtnl_rttable_a2n(__u32 *id, const char *arg)
if (!rtnl_rttable_init)
rtnl_rttable_initialize();
- for (i=0; i<256; i++) {
+ for (i = 0; i < 256; i++) {
entry = rtnl_rttable_hash[i];
while (entry && strcmp(entry->name, arg))
entry = entry->next;
@@ -405,7 +433,7 @@ int rtnl_rttable_a2n(__u32 *id, const char *arg)
}
-static char * rtnl_rtdsfield_tab[256] = {
+static char *rtnl_rtdsfield_tab[256] = {
"0",
};
@@ -420,7 +448,7 @@ static void rtnl_rtdsfield_initialize(void)
const char *rtnl_dsfield_n2a(int id, char *buf, int len)
{
- if (id<0 || id>=256) {
+ if (id < 0 || id >= 256) {
snprintf(buf, len, "%d", id);
return buf;
}
@@ -437,7 +465,7 @@ const char *rtnl_dsfield_n2a(int id, char *buf, int len)
int rtnl_dsfield_a2n(__u32 *id, const char *arg)
{
- static char *cache = NULL;
+ static char *cache;
static unsigned long res;
char *end;
int i;
@@ -450,7 +478,7 @@ int rtnl_dsfield_a2n(__u32 *id, const char *arg)
if (!rtnl_rtdsfield_init)
rtnl_rtdsfield_initialize();
- for (i=0; i<256; i++) {
+ for (i = 0; i < 256; i++) {
if (rtnl_rtdsfield_tab[i] &&
strcmp(rtnl_rtdsfield_tab[i], arg) == 0) {
cache = rtnl_rtdsfield_tab[i];
@@ -468,9 +496,11 @@ int rtnl_dsfield_a2n(__u32 *id, const char *arg)
}
-static struct rtnl_hash_entry dflt_group_entry = { .id = 0, .name = "default" };
+static struct rtnl_hash_entry dflt_group_entry = {
+ .id = 0, .name = "default"
+};
-static struct rtnl_hash_entry * rtnl_group_hash[256] = {
+static struct rtnl_hash_entry *rtnl_group_hash[256] = {
[0] = &dflt_group_entry,
};
@@ -485,7 +515,7 @@ static void rtnl_group_initialize(void)
int rtnl_group_a2n(int *id, const char *arg)
{
- static const char *cache = NULL;
+ static const char *cache;
static unsigned long res;
struct rtnl_hash_entry *entry;
char *end;
@@ -499,7 +529,7 @@ int rtnl_group_a2n(int *id, const char *arg)
if (!rtnl_group_init)
rtnl_group_initialize();
- for (i=0; i<256; i++) {
+ for (i = 0; i < 256; i++) {
entry = rtnl_group_hash[i];
while (entry && strcmp(entry->name, arg))
entry = entry->next;
@@ -526,11 +556,10 @@ const char *rtnl_group_n2a(int id, char *buf, int len)
if (!rtnl_group_init)
rtnl_group_initialize();
- for (i=0; i<256; i++) {
+ for (i = 0; i < 256; i++) {
entry = rtnl_group_hash[i];
- if (entry && entry->id == id) {
+ if (entry && entry->id == id)
return entry->name;
- }
}
snprintf(buf, len, "%d", id);
@@ -589,7 +618,7 @@ const char *nl_proto_n2a(int id, char *buf, int len)
int nl_proto_a2n(__u32 *id, const char *arg)
{
- static char *cache = NULL;
+ static char *cache;
static unsigned long res;
char *end;
int i;
diff --git a/lib/utils.c b/lib/utils.c
index a3f4268e..46a20de7 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -25,11 +25,13 @@
#include <asm/types.h>
#include <linux/pkt_sched.h>
#include <linux/param.h>
+#include <linux/if_arp.h>
+#include <linux/mpls.h>
#include <time.h>
#include <sys/time.h>
#include <errno.h>
-
+#include "rt_names.h"
#include "utils.h"
#include "namespace.h"
@@ -382,6 +384,41 @@ static int get_addr_ipv4(__u8 *ap, const char *cp)
return 1;
}
+int get_addr64(__u64 *ap, const char *cp)
+{
+ int i;
+
+ union {
+ __u16 v16[4];
+ __u64 v64;
+ } val;
+
+ for (i = 0; i < 4; i++) {
+ unsigned long n;
+ char *endp;
+
+ n = strtoul(cp, &endp, 16);
+ if (n > 0xffff)
+ return -1; /* bogus network value */
+
+ if (endp == cp) /* no digits */
+ return -1;
+
+ val.v16[i] = htons(n);
+
+ if (*endp == '\0')
+ break;
+
+ if (i == 3 || *endp != ':')
+ return -1; /* extra characters */
+ cp = endp + 1;
+ }
+
+ *ap = val.v64;
+
+ return 1;
+}
+
int get_addr_1(inet_prefix *addr, const char *name, int family)
{
memset(addr, 0, sizeof(*addr));
@@ -389,7 +426,7 @@ int get_addr_1(inet_prefix *addr, const char *name, int family)
if (strcmp(name, "default") == 0 ||
strcmp(name, "all") == 0 ||
strcmp(name, "any") == 0) {
- if (family == AF_DECnet)
+ if ((family == AF_DECnet) || (family == AF_MPLS))
return -1;
addr->family = family;
addr->bytelen = (family == AF_INET6 ? 16 : 4);
@@ -397,6 +434,18 @@ int get_addr_1(inet_prefix *addr, const char *name, int family)
return 0;
}
+ if (family == AF_PACKET) {
+ int len;
+ len = ll_addr_a2n((char *)&addr->data, sizeof(addr->data), name);
+ if (len < 0)
+ return -1;
+
+ addr->family = AF_PACKET;
+ addr->bytelen = len;
+ addr->bitlen = len * 8;
+ return 0;
+ }
+
if (strchr(name, ':')) {
addr->family = AF_INET6;
if (family != AF_UNSPEC && family != AF_INET6)
@@ -421,6 +470,23 @@ int get_addr_1(inet_prefix *addr, const char *name, int family)
}
#endif
+ if (family == AF_MPLS) {
+ int i;
+ addr->family = AF_MPLS;
+ if (mpls_pton(AF_MPLS, name, addr->data) <= 0)
+ return -1;
+ addr->bytelen = 4;
+ addr->bitlen = 20;
+ /* How many bytes do I need? */
+ for (i = 0; i < 8; i++) {
+ if (ntohl(addr->data[i]) & MPLS_LS_S_MASK) {
+ addr->bytelen = (i + 1)*4;
+ break;
+ }
+ }
+ return 0;
+ }
+
addr->family = AF_INET;
if (family != AF_UNSPEC && family != AF_INET)
return -1;
@@ -444,6 +510,8 @@ int af_bit_len(int af)
return 16;
case AF_IPX:
return 80;
+ case AF_MPLS:
+ return 20;
}
return 0;
@@ -465,7 +533,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family)
if (strcmp(arg, "default") == 0 ||
strcmp(arg, "any") == 0 ||
strcmp(arg, "all") == 0) {
- if (family == AF_DECnet)
+ if ((family == AF_DECnet) || (family == AF_MPLS))
return -1;
dst->family = family;
dst->bytelen = 0;
@@ -499,12 +567,9 @@ done:
int get_addr(inet_prefix *dst, const char *arg, int family)
{
- if (family == AF_PACKET) {
- fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg);
- exit(1);
- }
if (get_addr_1(dst, arg, family)) {
- fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg);
+ fprintf(stderr, "Error: %s address is expected rather than \"%s\".\n",
+ family_name(family) ,arg);
exit(1);
}
return 0;
@@ -517,7 +582,8 @@ int get_prefix(inet_prefix *dst, char *arg, int family)
exit(1);
}
if (get_prefix_1(dst, arg, family)) {
- fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", arg);
+ fprintf(stderr, "Error: %s prefix is expected rather than \"%s\".\n",
+ family_name(family) ,arg);
exit(1);
}
return 0;
@@ -638,13 +704,15 @@ int __get_user_hz(void)
return sysconf(_SC_CLK_TCK);
}
-const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen)
+const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen)
{
switch (af) {
case AF_INET:
case AF_INET6:
return inet_ntop(af, addr, buf, buflen);
#ifndef ANDROID
+ case AF_MPLS:
+ return mpls_ntop(af, addr, buf, buflen);
case AF_IPX:
return ipx_ntop(af, addr, buf, buflen);
case AF_DECnet:
@@ -654,11 +722,52 @@ const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen)
return dnet_ntop(af, &dna, buf, buflen);
}
#endif
+ case AF_PACKET:
+ return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen);
default:
return "???";
}
}
+int read_family(const char *name)
+{
+ int family = AF_UNSPEC;
+ if (strcmp(name, "inet") == 0)
+ family = AF_INET;
+ else if (strcmp(name, "inet6") == 0)
+ family = AF_INET6;
+ else if (strcmp(name, "dnet") == 0)
+ family = AF_DECnet;
+ else if (strcmp(name, "link") == 0)
+ family = AF_PACKET;
+ else if (strcmp(name, "ipx") == 0)
+ family = AF_IPX;
+ else if (strcmp(name, "mpls") == 0)
+ family = AF_MPLS;
+ else if (strcmp(name, "bridge") == 0)
+ family = AF_BRIDGE;
+ return family;
+}
+
+const char *family_name(int family)
+{
+ if (family == AF_INET)
+ return "inet";
+ if (family == AF_INET6)
+ return "inet6";
+ if (family == AF_DECnet)
+ return "dnet";
+ if (family == AF_PACKET)
+ return "link";
+ if (family == AF_IPX)
+ return "ipx";
+ if (family == AF_MPLS)
+ return "mpls";
+ if (family == AF_BRIDGE)
+ return "bridge";
+ return "???";
+}
+
#ifdef RESOLVE_HOSTNAMES
struct namerec
{
@@ -727,7 +836,7 @@ const char *format_host(int af, int len, const void *addr,
return n;
}
#endif
- return rt_addr_n2a(af, addr, buf, buflen);
+ return rt_addr_n2a(af, len, addr, buf, buflen);
}
@@ -768,6 +877,30 @@ __u8* hexstring_a2n(const char *str, __u8 *buf, int blen)
return buf;
}
+int addr64_n2a(__u64 addr, char *buff, size_t len)
+{
+ __u16 *words = (__u16 *)&addr;
+ __u16 v;
+ int i, ret;
+ size_t written = 0;
+ char *sep = ":";
+
+ for (i = 0; i < 4; i++) {
+ v = ntohs(words[i]);
+
+ if (i == 3)
+ sep = "";
+
+ ret = snprintf(&buff[written], len - written, "%x%s", v, sep);
+ if (ret < 0)
+ return ret;
+
+ written += ret;
+ }
+
+ return written;
+}
+
int print_timestamp(FILE *fp)
{
struct timeval tv;
@@ -794,7 +927,6 @@ int print_timestamp(FILE *fp)
int cmdlineno;
-#ifndef ANDROID
/* Like glibc getline but handle continuation lines and comments */
ssize_t getcmdline(char **linep, size_t *lenp, FILE *in)
{
@@ -839,7 +971,6 @@ ssize_t getcmdline(char **linep, size_t *lenp, FILE *in)
}
return cc;
}
-#endif
/* split command line into argument vector */
int makeargs(char *line, char *argv[], int maxargs)
@@ -848,12 +979,31 @@ int makeargs(char *line, char *argv[], int maxargs)
char *cp;
int argc = 0;
- for (cp = strtok(line, ws); cp; cp = strtok(NULL, ws)) {
+ for (cp = line + strspn(line, ws); *cp; cp += strspn(cp, ws)) {
if (argc >= (maxargs - 1)) {
fprintf(stderr, "Too many arguments to command\n");
exit(1);
}
+
+ /* word begins with quote */
+ if (*cp == '\'' || *cp == '"') {
+ char quote = *cp++;
+
+ argv[argc++] = cp;
+ /* find ending quote */
+ cp = strchr(cp, quote);
+ if (cp == NULL) {
+ fprintf(stderr, "Unterminated quoted string\n");
+ exit(1);
+ }
+ *cp++ = 0;
+ continue;
+ }
+
argv[argc++] = cp;
+ /* find end of word */
+ cp += strcspn(cp, ws);
+ *cp++ = 0;
}
argv[argc] = NULL;
diff --git a/man/man3/libnetlink.3 b/man/man3/libnetlink.3
index e999bd68..99be9cc9 100644
--- a/man/man3/libnetlink.3
+++ b/man/man3/libnetlink.3
@@ -33,7 +33,8 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
void *jarg)
.sp
int rtnl_listen(struct rtnl_handle *rtnl,
- int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+ int (*handler)(struct sockaddr_nl *, struct rtnl_ctrl_data *,
+ struct nlmsghdr *n, void *),
void *jarg)
.sp
int rtnl_from_file(FILE *rtnl,
@@ -108,8 +109,8 @@ rtnl_listen
Receive netlink data after a request and pass it to
.I handler.
.B handler
-is a callback that gets the message source address, the message itself,
-and the
+is a callback that gets the message source address, anscillary data, the message
+itself, and the
.B jarg
cookie as arguments. It will get called for all received messages.
Only one message bundle is received. If there is a message
diff --git a/man/man8/Makefile b/man/man8/Makefile
index 152747a3..2f776406 100644
--- a/man/man8/Makefile
+++ b/man/man8/Makefile
@@ -1,7 +1,8 @@
TARGETS = ip-address.8 ip-link.8 ip-route.8
-MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \
- tc.8 tc-bfifo.8 tc-cbq.8 tc-cbq-details.8 tc-choke.8 tc-codel.8 \
+MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 rtpr.8 ss.8 \
+ tc.8 tc-bfifo.8 tc-bpf.8 tc-cbq.8 tc-cbq-details.8 tc-choke.8 tc-codel.8 \
+ tc-fq.8 \
tc-drr.8 tc-ematch.8 tc-fq_codel.8 tc-hfsc.8 tc-htb.8 tc-pie.8 \
tc-mqprio.8 tc-netem.8 tc-pfifo.8 tc-pfifo_fast.8 tc-prio.8 tc-red.8 \
tc-sfb.8 tc-sfq.8 tc-stab.8 tc-tbf.8 \
@@ -9,7 +10,11 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \
ip-addrlabel.8 ip-fou.8 ip-gue.8 ip-l2tp.8 \
ip-maddress.8 ip-monitor.8 ip-mroute.8 ip-neighbour.8 \
ip-netns.8 ip-ntable.8 ip-rule.8 ip-tunnel.8 ip-xfrm.8 \
- ip-tcp_metrics.8 ip-netconf.8 ip-token.8
+ ip-tcp_metrics.8 ip-netconf.8 ip-token.8 \
+ tipc.8 tipc-bearer.8 tipc-link.8 tipc-media.8 tipc-nametable.8 \
+ tipc-node.8 tipc-socket.8 \
+ tc-basic.8 tc-cgroup.8 tc-flow.8 tc-flower.8 tc-fw.8 tc-route.8 \
+ tc-tcindex.8 tc-u32.8
all: $(TARGETS)
diff --git a/man/man8/bridge.8 b/man/man8/bridge.8
index 4135d01a..0ec6f174 100644
--- a/man/man8/bridge.8
+++ b/man/man8/bridge.8
@@ -21,17 +21,18 @@ bridge \- show / manipulate bridge addresses and devices
\fB\-V\fR[\fIersion\fR] |
\fB\-s\fR[\fItatistics\fR] |
\fB\-n\fR[\fIetns\fR] name }
+\fB\-b\fR[\fIatch\fR] filename }
.ti -8
.BR "bridge link set"
-.B dev
+.B dev
.IR DEV
.IR " [ "
-.B cost
+.B cost
.IR COST " ] [ "
-.B priority
-.IR PRIO " ] [ "
-.B state
+.B priority
+.IR PRIO " ] [ "
+.B state
.IR STATE "] ["
.BR guard " { " on " | " off " } ] [ "
.BR hairpin " { " on " | " off " } ] [ "
@@ -41,21 +42,21 @@ bridge \- show / manipulate bridge addresses and devices
.BR learning_sync " { " on " | " off " } ] [ "
.BR flood " { " on " | " off " } ] [ "
.BR hwmode " { " vepa " | " veb " } ] [ "
-.BR self " ] [ " master " ] "
+.BR self " ] [ " master " ] "
.ti -8
.BR "bridge link" " [ " show " ] [ "
-.B dev
+.B dev
.IR DEV " ]"
.ti -8
-.BR "bridge fdb" " { " add " | " append " | " del " } "
+.BR "bridge fdb" " { " add " | " append " | " del " | " replace " } "
.I LLADDR
-.B dev
+.B dev
.IR DEV " { "
-.BR local " | " temp " } { "
-.BR self " } { " router " } [ "
-.B dst
+.BR local " | " temp " } [ "
+.BR self " ] [ " master " ] [ " router " ] [ " use " ] [ "
+.B dst
.IR IPADDR " ] [ "
.B vni
.IR VNI " ] ["
@@ -66,36 +67,38 @@ bridge \- show / manipulate bridge addresses and devices
.ti -8
.BR "bridge fdb" " [ " show " ] [ "
-.B dev
+.B dev
.IR DEV " ]"
.ti -8
.BR "bridge mdb" " { " add " | " del " } "
-.B dev
+.B dev
.IR DEV
.B port
.IR PORT
.B grp
.IR GROUP " [ "
-.BR permanent " | " temp " ]"
+.BR permanent " | " temp " ] [ "
+.B vid
+.IR VID " ] "
.ti -8
.BR "bridge mdb show " [ "
-.B dev
+.B dev
.IR DEV " ]"
.ti -8
.BR "bridge vlan" " { " add " | " del " } "
-.B dev
+.B dev
.IR DEV
-.B vid
+.B vid
.IR VID " [ "
-.BR pvid " ] [ " untagged " ] [ "
-.BR self " ] [ " master " ] "
+.BR pvid " ] [ " untagged " ] [ "
+.BR self " ] [ " master " ] "
.ti -8
.BR "bridge vlan" " [ " show " ] [ "
-.B dev
+.B dev
.IR DEV " ]"
.ti -8
@@ -135,6 +138,16 @@ to
.RI "-n[etns] " NETNS " [ " OPTIONS " ] " OBJECT " { " COMMAND " | "
.BR help " }"
+.TP
+.BR "\-b", " \-batch " <FILENAME>
+Read commands from provided file or standard input and invoke them.
+First failure will cause termination of bridge command.
+
+.TP
+.BR "\-force"
+Don't terminate bridge command on errors in batch mode.
+If there were any errors during execution of the commands, the application
+return code will be non zero.
.SH BRIDGE - COMMAND SYNTAX
@@ -146,7 +159,7 @@ to
- Bridge port.
.TP
-.B fdb
+.B fdb
- Forwarding Database entry.
.TP
@@ -224,7 +237,7 @@ state the port for list for STP BPDUs and drop all other traffic.
.B 2
- STP LEARNING state. Only valid if STP is enabled on the bridge. In this
state the port will accept traffic only for the purpose of updating MAC
-adress tables.
+address tables.
.sp
.B 3
@@ -295,6 +308,9 @@ link setting is configured on specified physical device
.BI master
link setting is configured on the software bridge (default)
+.TP
+.BR "\-t" , " \-timestamp"
+display current time when using monitor option.
.SS bridge link show - list bridge port configuration.
@@ -303,7 +319,7 @@ This command displays the current bridge port configuration and flags.
.SH bridge fdb - forwarding database management
.B fdb
-objects contain known Ethernet addresses on a link.
+objects contain known Ethernet addresses on a link.
.P
The corresponding commands display fdb entries, add new entries,
@@ -323,7 +339,11 @@ the Ethernet MAC address.
the interface to which this address is associated.
.B self
-- the address is associated with a software fdb (default)
+- the address is associated with the port drivers fdb. Usually hardware.
+.sp
+
+.B master
+- the address is associated with master devices fdb. Usually software (default).
.sp
.B router
@@ -332,6 +352,11 @@ Valid if the referenced device is a VXLAN type device and has
route shortcircuit enabled.
.sp
+.B use
+- the address is in use. User space can use this option to
+indicate to the kernel that the fdb entry is in use.
+.sp
+
.in -8
The next command line parameters apply only
when the specified device
@@ -359,7 +384,7 @@ If omitted the default value is used.
.BI via " DEVICE"
device name of the outgoing interface for the
VXLAN device driver to reach the
-remote VXLAN tunnel endpoint.
+remote VXLAN tunnel endpoint.
.SS bridge fdb append - append a forwarding database entry
This command adds a new fdb entry with an already known
@@ -373,14 +398,21 @@ sends a copy of the data packet to each entry found.
.PP
The arguments are the same as with
-.BR "bridge fdb add" ,
+.BR "bridge fdb add" .
.SS bridge fdb delete - delete a forwarding database entry
This command removes an existing fdb entry.
.PP
The arguments are the same as with
-.BR "bridge fdb add" ,
+.BR "bridge fdb add" .
+
+.SS bridge fdb replace - replace a forwarding database entry
+If no matching entry is found, a new one will be created instead.
+
+.PP
+The arguments are the same as with
+.BR "bridge fdb add" .
.SS bridge fdb show - list forwarding entries.
@@ -426,6 +458,10 @@ the port.
- the mdb entry is temporary (default)
.sp
+.TP
+.BI vid " VID"
+the VLAN ID which is known to have members of this multicast group.
+
.in -8
.SS bridge mdb delete - delete a multicast group database entry
This command removes an existing mdb entry.
@@ -512,7 +548,7 @@ This command displays the current VLAN filter table.
The
.B bridge
-utility can monitor the state of devices and addresses
+utility can monitor the state of devices and addresses
continuously. This option has a slightly different format.
Namely, the
.B monitor
@@ -524,7 +560,7 @@ command is the first in the command line and then the object list follows:
.I OBJECT-LIST
is the list of object types that we want to monitor.
It may contain
-.BR link ", " fdb ", and " mdb "."
+.BR link ", " fdb ", and " mdb "."
If no
.B file
argument is given,
@@ -535,8 +571,7 @@ described in previous sections.
.P
If a file name is given, it does not listen on RTNETLINK,
but opens the file containing RTNETLINK messages saved in binary format
-and dumps them. Such a history file can be generated with the
-
+and dumps them.
.SH NOTES
This command uses facilities added in Linux 3.0.
diff --git a/man/man8/genl.8 b/man/man8/genl.8
new file mode 100644
index 00000000..b9de594d
--- /dev/null
+++ b/man/man8/genl.8
@@ -0,0 +1,77 @@
+.TH GENL 8 "29 Oct 2015" "iproute2" "Linux"
+.SH NAME
+genl \- generic netlink utility frontend
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR genl " [ " -s [ tatistics "] ] [ " -d [ etails "] ] [ " -r [ aw "] ] " OBJECT
+
+.ti -8
+.BR genl " { " -V [ ersion "] | " -h [ elp "] }"
+
+.ti -8
+.IR OBJECT " := { "
+.B ctrl
+.IR CTRL_OPTS " }"
+
+.ti -8
+.IR CTRL_OPTS " := { "
+.BR help " | " list " | " monitor " | " get
+.IR PARMS " }"
+
+.ti -8
+.IR PARMS " := { "
+.B name
+.IR NAME " | "
+.B id
+.IR ID " }"
+.SH DESCRIPTION
+The
+.B genl
+utility provides a simple frontend to the generic netlink library. Although it's
+designed to support multiple
+.IR OBJECT s,
+for now only the
+.B ctrl
+object is available, which is used to query the generic netlink controller.
+.SS ctrl
+The generic netlink controller can be queried in various ways:
+.TP
+.B help
+This command just prints a help text for the
+.B ctrl
+object.
+.TP
+.B list
+Show the registered netlink users.
+.TP
+.B monitor
+Listen for generic netlink notifications.
+.TP
+.B get
+Query the controller for a given user, identified either by
+.BR name " or " id .
+.SH OPTIONS
+genl supports the following options.
+.TP
+.B \-h, \-help
+Show summary of options.
+.TP
+.B \-V, \-Version
+Show version of program.
+.TP
+.B \-s, \-stats, \-statistics
+Show object statistics.
+.TP
+.B \-d, \-details
+Show object details.
+.TP
+.B \-r, \-raw
+Dump raw output only.
+.SH SEE ALSO
+.BR ip (8)
+.br
+.SH AUTHOR
+genl was written by Jamal Hadi Salim <hadi@cyberus.ca>.
+.PP
+This manual page was written by Petr Sabata <contyk@redhat.com>.
diff --git a/man/man8/ifcfg.8 b/man/man8/ifcfg.8
new file mode 100644
index 00000000..1a3786c1
--- /dev/null
+++ b/man/man8/ifcfg.8
@@ -0,0 +1,48 @@
+.TH IFCFG 8 "September 24 2009" "iproute2" "Linux"
+.SH NAME
+ifcfg \- simplistic script which replaces ifconfig IP management
+.SH SYNOPSIS
+.ad l
+.in +8
+.ti -8
+.B ifcfg
+.RI "[ " DEVICE " ] [ " command " ] " ADDRESS " [ " PEER " ] "
+.sp
+
+.SH DESCRIPTION
+This manual page documents briefly the
+.B ifcfg
+command.
+.PP
+This is a simplistic script replacing one option of
+.B ifconfig
+, namely, IP address management. It not only adds
+addresses, but also carries out Duplicate Address Detection RFC-DHCP,
+sends unsolicited ARP to update the caches of other hosts sharing
+the interface, adds some control routes and restarts Router Discovery
+when it is necessary.
+
+.SH IFCONFIG - COMMAND SYNTAX
+
+.SS
+.TP
+.B DEVICE
+- it may have alias, suffix, separated by colon.
+
+.TP
+.B command
+- add, delete or stop.
+
+.TP
+.B ADDRESS
+- optionally followed by prefix length.
+
+.TP
+.B peer
+- optional peer address for pointpoint interfaces.
+
+.SH NOTES
+This script is not suitable for use with IPv6.
+
+.SH SEE ALSO
+.RB "IP Command reference " ip-cref.ps
diff --git a/man/man8/ifstat.8 b/man/man8/ifstat.8
new file mode 100644
index 00000000..e49d8680
--- /dev/null
+++ b/man/man8/ifstat.8
@@ -0,0 +1,59 @@
+.TH IFSTAT 8 "28 Oct 2015" "iproute2" "Linux"
+.SH NAME
+ifstat \- handy utility to read network interface statistics
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR ifstat " [ "
+.IR OPTIONS " ] [ " INTERFACE_LIST " ]"
+
+.ti -8
+.IR INTERFACE_LIST " := " INTERFACE_LIST " | " interface
+.SH DESCRIPTION
+\fBifstat\fP neatly prints out network interface statistics.
+The utility keeps records of the previous data displayed in history files and
+by default only shows difference between the last and the current call.
+Location of the history files defaults to /tmp/.ifstat.u$UID but may be
+overridden with the IFSTAT_HISTORY environment variable.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Show summary of options.
+.TP
+.B \-V, \-\-version
+Show version of program.
+.TP
+.B \-a, \-\-ignore
+Ignore the history file.
+.TP
+.B \-d, \-\-scan=SECS
+Sample statistics every SECS second.
+.TP
+.B \-e, \-\-errors
+Show errors.
+.TP
+.B \-n, \-\-nooutput
+Don't display any output. Update the history file only.
+.TP
+.B \-r, \-\-reset
+Reset history.
+.TP
+.B \-s, \-\-noupdate
+Don't update the history file.
+.TP
+.B \-t, \-\-interval=SECS
+Report average over the last SECS seconds.
+.TP
+.B \-z, \-\-zeros
+Show entries with zero activity.
+.SH ENVIRONMENT
+.TP
+.B IFSTAT_HISTORY
+If set, it's value is interpreted as alternate history file path.
+.SH SEE ALSO
+.BR ip (8)
+.br
+.SH AUTHOR
+ifstat was written by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>.
+.PP
+This manual page was written by Petr Sabata <contyk@redhat.com>.
diff --git a/man/man8/ip-address.8.in b/man/man8/ip-address.8.in
index 6e46af88..159d9065 100644
--- a/man/man8/ip-address.8.in
+++ b/man/man8/ip-address.8.in
@@ -14,18 +14,26 @@ ip-address \- protocol address management
.sp
.ti -8
-.BR "ip address" " { " add " | " del " } "
-.IB IFADDR " dev " STRING
+.BR "ip address" " { " add " | " change " | " replace " } "
+.IB IFADDR " dev " IFNAME
+.RI "[ " LIFETIME " ] [ " CONFFLAG-LIST " ]"
.ti -8
-.BR "ip address" " { " show " | " flush " } [ " dev
-.IR STRING " ] [ "
+.BR "ip address del"
+.IB IFADDR " dev " IFNAME " [ " mngtmpaddr " ]"
+
+.ti -8
+.BR "ip address" " { " show " | " save " | " flush " } [ " dev
+.IR IFNAME " ] [ "
.B scope
.IR SCOPE-ID " ] [ "
.B to
.IR PREFIX " ] [ " FLAG-LIST " ] [ "
.B label
-.IR PATTERN " ]"
+.IR PATTERN " ] [ " up " ]"
+
+.ti -8
+.BR "ip address" " { " showdump " | " restore " }"
.ti -8
.IR IFADDR " := " PREFIX " | " ADDR
@@ -36,7 +44,7 @@ ip-address \- protocol address management
.B anycast
.IR ADDR " ] [ "
.B label
-.IR STRING " ] [ "
+.IR LABEL " ] [ "
.B scope
.IR SCOPE-ID " ]"
@@ -52,15 +60,33 @@ ip-address \- protocol address management
.IR FLAG " := "
.RB "[ " permanent " | " dynamic " | " secondary " | " primary " | \
[ - ] " tentative " | [ - ] " deprecated " | [ - ] " dadfailed " | "\
-temporary " ]"
+temporary " | " CONFFLAG-LIST " ]"
+
+.ti -8
+.IR CONFFLAG-LIST " := [ " CONFFLAG-LIST " ] " CONFFLAG
+
+.ti -8
+.IR CONFFLAG " := "
+.RB "[ " home " | " mngtmpaddr " | " nodad " | " noprefixroute " ]"
+
+.ti -8
+.IR LIFETIME " := [ "
+.BI valid_lft " LFT"
+.RB "| " preferred_lft
+.IR LFT " ]"
+
+.ti -8
+.IR LFT " := [ "
+.BR forever " |"
+.IR SECONDS " ]"
.SH "DESCRIPTION"
The
.B address
-is a protocol (IP or IPv6) address attached
-to a network device. Each device must have at least one address
-to use the corresponding protocol. It is possible to have several
-different addresses attached to one device. These addresses are not
+is a protocol (IPv4 or IPv6) address attached
+to a network device. Each device must have at least one address
+to use the corresponding protocol. It is possible to have several
+different addresses attached to one device. These addresses are not
discriminated, so that the term
.B alias
is not quite appropriate for them and we do not use it in this document.
@@ -73,7 +99,7 @@ and deletes old ones.
.SS ip address add - add new protocol address.
.TP
-.BI dev " NAME"
+.BI dev " IFNAME "
the name of the device to add the address to.
.TP
@@ -107,7 +133,7 @@ instead of the broadcast address. In this case, the broadcast address
is derived by setting/resetting the host bits of the interface prefix.
.TP
-.BI label " NAME"
+.BI label " LABEL"
Each address may be tagged with a label string.
In order to preserve compatibility with Linux-2.0 net aliases,
this string must coincide with the name of the device or must be prefixed
@@ -125,7 +151,7 @@ Predefined scope values are:
- the address is globally valid.
.sp
.B site
-- (IPv6 only) the address is site local, i.e. it is
+- (IPv6 only, deprecated) the address is site local, i.e. it is
valid inside this site.
.sp
.B link
@@ -135,6 +161,46 @@ valid inside this site.
- the address is valid only inside this host.
.in -8
+.TP
+.BI valid_lft " LFT"
+the valid lifetime of this address; see section 5.5.4 of
+RFC 4862. When it expires, the address is removed by the kernel.
+Defaults to
+.BR "forever" .
+
+.TP
+.BI preferred_lft " LFT"
+the preferred lifetime of this address; see section 5.5.4
+of RFC 4862. When it expires, the address is no longer used for new
+outgoing connections. Defaults to
+.BR "forever" .
+
+.TP
+.B home
+(IPv6 only) designates this address the "home address" as defined in
+RFC 6275.
+
+.TP
+.B mngtmpaddr
+(IPv6 only) make the kernel manage temporary addresses created from this one as
+template on behalf of Privacy Extensions (RFC3041). For this to become active,
+the \fBuse_tempaddr\fP sysctl setting has to be set to a value greater than
+zero. The given address needs to have a prefix length of 64. This flag allows
+to use privacy extensions in a manually configured network, just like if
+stateless auto-configuration was active.
+
+.TP
+.B nodad
+(IPv6 only) do not perform Duplicate Address Detection (RFC 4862) when
+adding this address.
+
+.TP
+.B noprefixroute
+Do not automatically create a route for the network prefix of the added
+address, and don't search for one to delete when removing the address. Changing
+an address to add this flag will remove the automatically added prefix route,
+changing it to remove this flag will create the prefix route automatically.
+
.SS ip address delete - delete protocol address
.B Arguments:
coincide with the arguments of
@@ -145,7 +211,7 @@ If no arguments are given, the first address is deleted.
.SS ip address show - look at protocol addresses
.TP
-.BI dev " NAME " (default)
+.BI dev " IFNAME " (default)
name of device.
.TP
@@ -219,36 +285,53 @@ The difference is that it does not run when no arguments are given.
.PP
.B Warning:
-This command (and other
+This command and other
.B flush
-commands described below) is pretty dangerous. If you make a mistake,
-it will not forgive it, but will cruelly purge all the addresses.
+commands are unforgiving. They will cruelly purge all the addresses.
.PP
With the
.B -statistics
option, the command becomes verbose. It prints out the number of deleted
-addresses and the number of rounds made to flush the address list. If
-this option is given twice,
+addresses and the number of rounds made to flush the address list.
+If this option is given twice,
.B ip address flush
also dumps all the deleted addresses in the format described in the
previous subsection.
.SH "EXAMPLES"
.PP
+ip address show
+.RS 4
+Shows IPv4 and IPv6 addresses assigned to all network interfaces. The 'show'
+subcommand can be omitted.
+.RE
+.PP
+ip address show up
+.RS 4
+Same as above except that only addresses assigned to active network interfaces
+are shown.
+.RE
+.PP
ip address show dev eth0
.RS 4
-Shows the addresses assigned to network interface eth0
+Shows IPv4 and IPv6 addresses assigned to network interface eth0.
+.RE
+.PP
+ip address add 2001:0db8:85a3::0370:7334/64 dev eth1
+.RS 4
+Adds an IPv6 address to network interface eth1.
.RE
.PP
-ip addr add 2001:0db8:85a3::0370:7334/64 dev eth1
+ip address delete 2001:0db8:85a3::0370:7334/64 dev eth1
.RS 4
-Adds an IPv6 address to network interface eth1
+Delete the IPv6 address added above.
.RE
.PP
-ip addr flush dev eth4
+ip address flush dev eth4 scope global
.RS 4
-Removes all addresses from device eth4
+Removes all global IPv4 and IPv6 addresses from device eth4. Without 'scope
+global' it would remove all addresses including IPv6 link-local ones.
.RE
.SH SEE ALSO
diff --git a/man/man8/ip-addrlabel.8 b/man/man8/ip-addrlabel.8
index 5fc18fec..51ef5727 100644
--- a/man/man8/ip-addrlabel.8
+++ b/man/man8/ip-addrlabel.8
@@ -7,8 +7,8 @@ ip-addrlabel \- protocol address label management
.in +8
.ti -8
.B ip
-.RI "[ " OPTIONS " ]"
-.B addrlabel
+.RI "[ " OPTIONS " ]"
+.B addrlabel
.RI " { " COMMAND " | "
.BR help " }"
.sp
@@ -66,4 +66,3 @@ flush all address labels in the kernel. This does not restore any default settin
.SH AUTHOR
Manpage by Yoshifuji Hideaki / å‰è—¤è‹±æ˜Ž
-
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index 5ad372c0..4d323435 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -21,7 +21,8 @@ ip-link \- network device configuration
\fB\-r\fR[\fIesolve\fR] |
\fB\-f\fR[\fIamily\fR] {
.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | "
-\fB\-o\fR[\fIneline\fR] }
+\fB\-o\fR[\fIneline\fR] |
+\fB\-br\fR[\fIief\fR] }
.ti -8
.BI "ip link add"
@@ -75,10 +76,15 @@ ip-link \- network device configuration
.BR ip6gretap " |"
.BR vti " |"
.BR nlmon " |"
-.BR ipvlan " ]"
+.BR ipvlan " |"
+.BR lowpan " |"
+.BR geneve " ]"
.ti -8
-.BI "ip link delete " DEVICE
+.BR "ip link delete " {
+.IR DEVICE " | "
+.BI "group " GROUP
+}
.BI type " TYPE"
.RI "[ " ARGS " ]"
@@ -96,6 +102,8 @@ ip-link \- network device configuration
.br
.BR multicast " { " on " | " off " } |"
.br
+.BR protodown " { " on " | " off " } |"
+.br
.B txqueuelen
.IR PACKETS " |"
.br
@@ -138,9 +146,13 @@ ip-link \- network device configuration
] |
.br
.B master
-.IR DEVICE
+.IR DEVICE " |"
+.br
+.B nomaster " |"
.br
-.B nomaster
+.B addrgenmode { eui64 | none | stable_secret | random }
+.br
+.B link-netnsid ID
.BR " }"
@@ -180,6 +192,8 @@ Link types:
.sp
.B bond
- Bonding device
+.B can
+- Controller Area Network interface
.sp
.B dummy
- Dummy network interface
@@ -240,6 +254,12 @@ Link types:
.sp
.BR ipvlan
- Interface for L3 (IPv6/IPv4) based VLANs
+.sp
+.BR lowpan
+- Interface for 6LoWPAN (IPv6) over IEEE 802.15.4 / Bluetooth
+.sp
+.BR geneve
+- GEneric NEtwork Virtualization Encapsulation
.in -8
.TP
@@ -255,6 +275,110 @@ specifies the number of receive queues for new device.
specifies the desired index of the new virtual device. The link creation fails, if the index is busy.
.TP
+VLAN Type Support
+For a link of type
+.I VLAN
+the following additional arguments are supported:
+
+.BI "ip link add
+.BI link " DEVICE "
+.BI name " NAME "
+.BI type " vlan "
+[
+.BI protocol " VLAN_PROTO "
+]
+.BI id " VLANID "
+[
+.BR reorder_hdr " { " on " | " off " } "
+]
+[
+.BR gvrp " { " on " | " off " } "
+]
+[
+.BR mvrp " { " on " | " off " } "
+]
+[
+.BR loose_binding " { " on " | " off " } "
+]
+[
+.BI ingress-qos-map " QOS-MAP "
+]
+[
+.BI egress-qos-map " QOS-MAP "
+]
+
+.in +8
+.sp
+.BI protocol " VLAN_PROTO "
+- either 802.1Q or 802.1ad.
+
+.BI id " VLANID "
+- specifies the VLAN Identifer to use. Note that numbers with a leading " 0 " or " 0x " are interpreted as octal or hexadeimal, respectively.
+
+.BR reorder_hdr " { " on " | " off " } "
+- specifies whether ethernet headers are reordered or not (default is
+.BR on ")."
+
+.in +4
+If
+.BR reorder_hdr " is " on
+then VLAN header will be not inserted immediately but only before passing to the
+physical device (if this device does not support VLAN offloading), the similar
+on the RX direction - by default the packet will be untagged before being
+received by VLAN device. Reordering allows to accelerate tagging on egress and
+to hide VLAN header on ingress so the packet looks like regular Ethernet packet,
+at the same time it might be confusing for packet capture as the VLAN header
+does not exist within the packet.
+
+VLAN offloading can be checked by
+.BR ethtool "(8):"
+.in +4
+.sp
+.B ethtool -k
+<phy_dev> |
+.RB grep " tx-vlan-offload"
+.sp
+.in -4
+where <phy_dev> is the physical device to which VLAN device is bound.
+.in -4
+
+.BR gvrp " { " on " | " off " } "
+- specifies whether this VLAN should be registered using GARP VLAN Registration Protocol.
+
+.BR mvrp " { " on " | " off " } "
+- specifies whether this VLAN should be registered using Multiple VLAN Registration Protocol.
+
+.BR loose_binding " { " on " | " off " } "
+- specifies whether the VLAN device state is bound to the physical device state.
+
+.BI ingress-qos-map " QOS-MAP "
+- defines a mapping of VLAN header prio field to the Linux internal packet
+priority on incoming frames. The format is FROM:TO with multiple mappings
+separated by spaces.
+
+.BI egress-qos-map " QOS-MAP "
+- defines a mapping of Linux internal packet priority to VLAN header prio field
+but for outgoing frames. The format is the same as for ingress-qos-map.
+.in +4
+
+Linux packet priority can be set by
+.BR iptables "(8)":
+.in +4
+.sp
+.B iptables
+-t mangle -A POSTROUTING [...] -j CLASSIFY --set-class 0:4
+.sp
+.in -4
+and this "4" priority can be used in the egress qos mapping to set VLAN prio "5":
+.sp
+.in +4
+.B ip
+link set veth0.10 type vlan egress 4:5
+.in -4
+.in -4
+.in -8
+
+.TP
VXLAN Type Support
For a link of type
.I VXLAN
@@ -262,35 +386,44 @@ the following additional arguments are supported:
.BI "ip link add " DEVICE
.BI type " vxlan " id " ID"
-.R " [ "
+[
.BI dev " PHYS_DEV "
.RB " ] [ { " group " | " remote " } "
.I IPADDR
-.R " ] [ "
-.BI local " IPADDR "
-.R " ] [ "
+] [
+.B local
+.RI "{ "IPADDR " | "any " } "
+] [
.BI ttl " TTL "
-.R " ] [ "
+] [
.BI tos " TOS "
-.R " ] [ "
-.BI port " MIN MAX "
-.R " ] [ "
+] [
+.BI dstport " PORT "
+] [
+.BI srcport " MIN MAX "
+] [
.I "[no]learning "
-.R " ] [ "
+] [
.I "[no]proxy "
-.R " ] [ "
+] [
.I "[no]rsc "
-.R " ] [ "
+] [
.I "[no]l2miss "
-.R " ] [ "
+] [
.I "[no]l3miss "
-.R " ] [ "
+] [
+.I "[no]udpcsum "
+] [
+.I "[no]udp6zerocsumtx "
+] [
+.I "[no]udp6zerocsumrx "
+] [
.BI ageing " SECONDS "
-.R " ] [ "
+] [
.BI maxaddress " NUMBER "
-.R " ] [ "
+] [
.B gbp
-.R " ]"
+]
.in +8
.sp
@@ -329,7 +462,11 @@ parameter.
- specifies the TOS value to use in outgoing packets.
.sp
-.BI port " MIN MAX"
+.BI dstport " PORT"
+- specifies the UDP destination port to communicate to the remote VXLAN tunnel endpoint.
+
+.sp
+.BI srcport " MIN MAX"
- specifies the range of port numbers to use as UDP
source ports to communicate to the remote VXLAN tunnel endpoint.
@@ -355,6 +492,18 @@ are entered into the VXLAN device forwarding database.
- specifies if netlink IP ADDR miss notifications are generated.
.sp
+.I [no]udpcsum
+- specifies if UDP checksum is filled in
+
+.sp
+.I [no]udp6zerocsumtx
+- specifies if UDP checksum is filled in
+
+.sp
+.I [no]udp6zerocsumrx
+- specifies if UDP checksum is received
+
+.sp
.BI ageing " SECONDS"
- specifies the lifetime in seconds of FDB entries learnt by the kernel.
@@ -416,17 +565,17 @@ the following additional arguments are supported:
.BI "ip link add " DEVICE
.BR type " { gre | ipip | sit } "
.BI " remote " ADDR " local " ADDR
-.R " [ "
+[
.BR encap " { fou | gue | none } "
-.R " ] [ "
+] [
.BI "encap-sport { " PORT " | auto } "
-.R " ] [ "
+] [
.BI "encap-dport " PORT
-.R " ] [ "
+] [
.I " [no]encap-csum "
-.R " ] [ "
+] [
.I " [no]encap-remcsum "
-.R " ]"
+]
.in +8
.sp
@@ -472,25 +621,25 @@ the following additional arguments are supported:
.BI "ip link add " DEVICE
.BI type " { ip6gre | ip6gretap } " remote " ADDR " local " ADDR
-.R " [ "
+[
.I "[i|o]seq]"
-.R " ] [ "
+] [
.I "[i|o]key" KEY
-.R " ] [ "
+] [
.I " [i|o]csum "
-.R " ] [ "
+] [
.BI hoplimit " TTL "
-.R " ] [ "
+] [
.BI encaplimit " ELIM "
-.R " ] [ "
+] [
.BI tclass " TCLASS "
-.R " ] [ "
+] [
.BI flowlabel " FLOWLABEL "
-.R " ] [ "
+] [
.BI "dscp inherit"
-.R " ] [ "
+] [
.BI dev " PHYS_DEV "
-.R " ]"
+]
.in +8
.sp
@@ -587,16 +736,103 @@ the following additional arguments are supported:
.BI mode " MODE "
- specifies the mode (datagram or connected) to use.
-.SS ip link delete - delete virtual link
-.I DEVICE
-specifies the virtual device to act operate on.
-.I TYPE
-specifies the type of the device.
+.TP
+GENEVE Type Support
+For a link of type
+.I GENEVE
+the following additional arguments are supported:
+.BI "ip link add " DEVICE
+.BI type " geneve " id " ID " remote " IPADDR"
+[
+.BI ttl " TTL "
+] [
+.BI tos " TOS "
+]
+
+.in +8
+.sp
+.BI id " VNI "
+- specifies the Virtual Network Identifer to use.
+
+.sp
+.BI remote " IPADDR"
+- specifies the unicast destination IP address to use in outgoing packets.
+
+.sp
+.BI ttl " TTL"
+- specifies the TTL value to use in outgoing packets.
+
+.sp
+.BI tos " TOS"
+- specifies the TOS value to use in outgoing packets.
+
+.in -8
+
+.TP
+MACVLAN and MACVTAP Type Support
+For a link of type
+.I MACVLAN
+or
+.I MACVTAP
+the following additional arguments are supported:
+
+.BI "ip link add link " DEVICE " name " NAME
+.BR type " { " macvlan " | " macvtap " } "
+.BR mode " { " private " | " vepa " | " bridge " | " passthru
+.BR " [ " nopromisc " ] } "
+
+.in +8
+.sp
+.BR type " { " macvlan " | " macvtap " } "
+- specifies the link type to use.
+.BR macvlan " creates just a virtual interface, while "
+.BR macvtap " in addition creates a character device "
+.BR /dev/tapX " to be used just like a " tuntap " device."
+
+.B mode private
+- Do not allow communication between
+.B macvlan
+instances on the same physical interface, even if the external switch supports
+hairpin mode.
+
+.B mode vepa
+- Virtual Ethernet Port Aggregator mode. Data from one
+.B macvlan
+instance to the other on the same physical interface is transmitted over the
+physical interface. Either the attached switch needs to support hairpin mode,
+or there must be a TCP/IP router forwarding the packets in order to allow
+communication. This is the default mode.
+
+.B mode bridge
+- In bridge mode, all endpoints are directly connected to each other,
+communication is not redirected through the physical interface's peer.
+
+.BR mode " " passthru " [ " nopromisc " ] "
+- This mode gives more power to a single endpoint, usually in
+.BR macvtap " mode. It is not allowed for more than one endpoint on the same "
+physical interface. All traffic will be forwarded to this endpoint, allowing
+virtio guests to change MAC address or set promiscuous mode in order to bridge
+the interface or create vlan interfaces on top of it. By default, this mode
+forces the underlying interface into promiscuous mode. Passing the
+.BR nopromisc " flag prevents this, so the promisc flag may be controlled "
+using standard tools.
+.in -8
+
+.SS ip link delete - delete virtual link
.TP
.BI dev " DEVICE "
-specifies the physical device to act operate on.
+specifies the virtual device to act operate on.
+
+.TP
+.BI group " GROUP "
+specifies the group of virtual links to delete. Group 0 is not allowed to be
+deleted since it is the default group.
+
+.TP
+.BI type " TYPE "
+specifies the type of the device.
.SS ip link set - change device attributes
@@ -634,6 +870,12 @@ change the
flag on the device.
.TP
+.BR "protodown on " or " protodown off"
+change the
+.B PROTODOWN
+state on the device. Indicates that a protocol error has been detected on the port. Switch drivers can react to this error by doing a phys down on the switch port.
+
+.TP
.BR "dynamic on " or " dynamic off"
change the
.B DYNAMIC
@@ -787,6 +1029,26 @@ set master device of the device (enslave device).
.BI nomaster
unset master device of the device (release device).
+.TP
+.BI addrgenmode " eui64|none|stable_secret|random"
+set the IPv6 address generation mode
+
+.I eui64
+- use a Modified EUI-64 format interface identifier
+
+.I none
+- disable automatic address generation
+
+.I stable_secret
+- generate the interface identifier based on a preset /proc/sys/net/ipv6/conf/{default,DEVICE}/stable_secret
+
+.I random
+- like stable_secret, but auto-generate a new random secret if none is set
+
+.TP
+.BR "link-netnsid "
+set peer netnsid for a cross-netns interface
+
.PP
.B Warning:
If multiple parameter changes are requested,
@@ -839,7 +1101,7 @@ output more statistics about packet usage.
output more detailed information.
.TP
-.BR "\-h", " \-human", " \-human-readble"
+.BR "\-h", " \-human", " \-human-readable"
output statistics with human readable values number followed by suffix
.TP
@@ -856,7 +1118,7 @@ specifies which help of link type to dislpay.
.SS
.I GROUP
may be a number or a string from the file
-.B /etc/iproute2/group
+.B @SYSCONFDIR@/group
which can be manually filled.
.SH "EXAMPLES"
@@ -909,11 +1171,19 @@ Creates an IPIP that is encapsulated with Generic UDP Encapsulation,
and the outer UDP checksum and remote checksum offload are enabled.
.RE
+.PP
+ip link add link wpan0 lowpan0 type lowpan
+.RS 4
+Creates a 6LoWPAN interface named lowpan0 on the underlying
+IEEE 802.15.4 device wpan0.
+.RE
.SH SEE ALSO
.br
.BR ip (8),
-.BR ip-netns (8)
+.BR ip-netns (8),
+.BR ethtool (8),
+.BR iptables (8)
.SH AUTHOR
Original Manpage by Michail Litvak <mci@owl.openwall.com>
diff --git a/man/man8/ip-monitor.8 b/man/man8/ip-monitor.8
index 1de0ca92..d2bd381a 100644
--- a/man/man8/ip-monitor.8
+++ b/man/man8/ip-monitor.8
@@ -12,6 +12,10 @@ ip-monitor, rtmon \- state monitoring
.IR OBJECT-LIST " ] ["
.BI file " FILENAME "
] [
+.BI label
+] [
+.BI all-nsid
+] [
.BI dev " DEVICE "
]
.sp
@@ -42,6 +46,10 @@ command is the first in the command line and then the object list follows:
.IR OBJECT-LIST " ] ["
.BI file " FILENAME "
] [
+.BI label
+] [
+.BI all-nsid
+] [
.BI dev " DEVICE "
]
@@ -49,7 +57,7 @@ command is the first in the command line and then the object list follows:
is the list of object types that we want to monitor.
It may contain
.BR link ", " address ", " route ", " mroute ", " prefix ", "
-.BR neigh ", " netconf " and " rule "."
+.BR neigh ", " netconf ", " rule " and " nsid "."
If no
.B file
argument is given,
@@ -59,6 +67,32 @@ described in previous sections.
.P
If the
+.BI label
+option is set, a prefix is displayed before each message to
+show the family of the message. For example:
+.sp
+.in +2
+[NEIGH]10.16.0.112 dev eth0 lladdr 00:04:23:df:2f:d0 REACHABLE
+[LINK]3: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN group default
+ link/ether 52:54:00:12:34:57 brd ff:ff:ff:ff:ff:ff
+.in -2
+.sp
+
+.P
+If the
+.BI all-nsid
+option is set, the program listens to all network namespaces that have a
+nsid assigned into the network namespace were the program is running.
+A prefix is displayed to show the network namespace where the message
+originates. Example:
+.sp
+.in +2
+[nsid 0]10.16.0.112 dev eth0 lladdr 00:04:23:df:2f:d0 REACHABLE
+.in -2
+.sp
+
+.P
+If the
.BI file
option is given, the program does not listen on RTNETLINK,
but opens the given file, and dumps its contents. The file
@@ -97,3 +131,5 @@ option is given, the program prints only events related to this device.
.SH AUTHOR
Original Manpage by Michail Litvak <mci@owl.openwall.com>
+.br
+Manpage revised by Nicolas Dichtel <nicolas.dichtel@6wind.com>
diff --git a/man/man8/ip-neighbour.8 b/man/man8/ip-neighbour.8
index b0fc0dd8..c9b0256e 100644
--- a/man/man8/ip-neighbour.8
+++ b/man/man8/ip-neighbour.8
@@ -33,9 +33,9 @@ ip-neighbour \- neighbour/arp tables management.
.SH DESCRIPTION
-The
+The
.B ip neigh
-command manipulates
+command manipulates
.I neighbour
objects that establish bindings between protocol addresses and
link layer addresses for hosts sharing the same link.
diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8
index 80a4ad12..c9b0fbc2 100644
--- a/man/man8/ip-netns.8
+++ b/man/man8/ip-netns.8
@@ -42,6 +42,9 @@ ip-netns \- process network namespace management
.ti -8
.BR "ip netns monitor"
+.ti -8
+.BR "ip netns list-id"
+
.SH DESCRIPTION
A network namespace is logically another copy of the network stack,
with its own routes, firewall rules, and network devices.
@@ -178,6 +181,13 @@ executing.
This command watches network namespace name addition and deletion events
and prints a line for each event it sees.
+.TP
+.B ip netns list-id - list network namespace ids (nsid)
+.sp
+Network namespace ids are used to identify a peer network namespace. This
+command displays nsid of the current network namespace and provides the
+corresponding iproute2 netns name (from /var/run/netns) if any.
+
.SH EXAMPLES
.PP
ip netns list
diff --git a/man/man8/ip-ntable.8 b/man/man8/ip-ntable.8
index d903a170..462e5896 100644
--- a/man/man8/ip-ntable.8
+++ b/man/man8/ip-ntable.8
@@ -55,7 +55,7 @@ ip-ntable - neighbour table configuration
.SH DESCRIPTION
.I ip ntable
-controls the parameters for the neighbour tables.
+controls the parameters for the neighbour tables.
.SS ip ntable show - list the ip neighbour tables
@@ -98,4 +98,4 @@ default value (3) to 8 packets.
.BR ip (8)
.SH AUTHOR
-Manpage by Stephen Hemminger
+Manpage by Stephen Hemminger
diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in
index d53cc769..c764bfc8 100644
--- a/man/man8/ip-route.8.in
+++ b/man/man8/ip-route.8.in
@@ -80,19 +80,31 @@ replace " } "
.ti -8
.IR NH " := [ "
+.B encap
+.IR ENCAP " ] [ "
.B via
-.IR ADDRESS " ] [ "
+[
+.IR FAMILY " ] " ADDRESS " ] [ "
.B dev
.IR STRING " ] [ "
.B weight
.IR NUMBER " ] " NHFLAGS
.ti -8
+.IR FAMILY " := [ "
+.BR inet " | " inet6 " | " ipx " | " dnet " | " mpls " | " bridge " | " link " ]"
+
+.ti -8
.IR OPTIONS " := " FLAGS " [ "
.B mtu
.IR NUMBER " ] [ "
.B advmss
.IR NUMBER " ] [ "
+.B as
+[
+.B to
+]
+.IR ADDRESS " ]"
.B rtt
.IR TIME " ] [ "
.B rttvar
@@ -118,7 +130,11 @@ replace " } "
.B quickack
.IR BOOL " ] [ "
.B congctl
-.IR NAME " ]"
+.IR NAME " ] [ "
+.B pref
+.IR PREF " ] [ "
+.B expires
+.IR TIME " ]"
.ti -8
.IR TYPE " := [ "
@@ -148,6 +164,30 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]"
.IR FEATURES " := [ "
.BR ecn " | ]"
+.ti -8
+.IR PREF " := [ "
+.BR low " | " medium " | " high " ]"
+
+.ti -8
+.IR ENCAP " := [ "
+.IR MPLS " | " IP " ]"
+
+.ti -8
+.IR ENCAP_MPLS " := "
+.BR mpls " [ "
+.IR LABEL " ]"
+
+.ti -8
+.IR ENCAP_IP " := "
+.B ip
+.B id
+.IR TUNNEL_ID
+.B dst
+.IR REMOTE_IP " [ "
+.B tos
+.IR TOS " ] ["
+.B ttl
+.IR TTL " ]"
.SH DESCRIPTION
.B ip route
@@ -333,9 +373,10 @@ table by default.
the output device name.
.TP
-.BI via " ADDRESS"
-the address of the nexthop router. Actually, the sense of this field
-depends on the route type. For normal
+.BI via " [ FAMILY ] ADDRESS"
+the address of the nexthop router, in the address family FAMILY.
+Actually, the sense of this field depends on the route type. For
+normal
.B unicast
routes it is either the true next hop router or, if it is a direct
route installed in BSD compatibility mode, it can be a local address
@@ -472,7 +513,7 @@ is a complex value with its own syntax similar to the top level
argument lists:
.in +8
-.BI via " ADDRESS"
+.BI via " [ FAMILY ] ADDRESS"
- is the nexthop router.
.sp
@@ -551,9 +592,79 @@ to assign (or not to assign) protocol tags.
.B onlink
pretend that the nexthop is directly attached to this link,
even if it does not match any interface prefix.
+
+.TP
+.BI pref " PREF"
+the IPv6 route preference.
+.I PREF
+is a string specifying the route preference as defined in RFC4191 for Router
+Discovery messages. Namely:
+
+.in +8
+.B low
+- the route has a lowest priority
+.sp
+
+.B medium
+- the route has a default priority
+.sp
+
+.B high
+- the route has a highest priority
+.sp
+
+.TP
+.BI encap " ENCAPTYPE ENCAPHDR"
+attach tunnel encapsulation attributes to this route.
+.sp
+.I ENCAPTYPE
+is a string specifying the supported encapsulation type. Namely:
+
+.in +8
+.BI mpls
+- encapsulation type MPLS
+.sp
+.BI ip
+- IP encapsulation (Geneve, GRE, VXLAN, ...)
+.sp
+
+.in -8
+.I ENCAPHDR
+is a set of encapsulation attributes specific to the
+.I ENCAPTYPE.
+
+.in +8
+.B mpls
+.in +2
+.I MPLSLABEL
+- mpls label stack with labels separated by
+.I "/"
+.in -2
+.sp
+
+.B ip
+.in +2
+.B id
+.I TUNNEL_ID
+.B dst
+.IR REMOTE_IP " [ "
+.B tos
+.IR TOS " ] ["
+.B ttl
+.IR TTL " ]"
+.in -2
+.sp
+
+.in -8
.RE
.TP
+.BI expires " TIME " "(4.4+ only)"
+the route will be deleted after the expires time.
+.B Only
+support IPv6 at present.
+
+.TP
ip route delete
delete route
.RS
@@ -669,7 +780,7 @@ only list routes of this type.
only list routes going via this device.
.TP
-.BI via " PREFIX"
+.BI via " [ FAMILY ] PREFIX"
only list routes going via the nexthop routers selected by
.IR PREFIX "."
@@ -808,7 +919,11 @@ ip route add default via 192.168.1.1 dev eth0
Adds a default route (for all addresses) via the local gateway 192.168.1.1 that can
be reached on device eth0.
.RE
-
+.PP
+ip route add 10.1.1.0/30 encap mpls 200/300 via 10.1.1.1 dev eth0
+.RS 4
+Adds an ipv4 route with mpls encapsulation attributes attached to it.
+.RE
.SH SEE ALSO
.br
.BR ip (8)
diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
index dd925be6..b7008c6a 100644
--- a/man/man8/ip-rule.8
+++ b/man/man8/ip-rule.8
@@ -15,10 +15,13 @@ ip-rule \- routing policy database management
.ti -8
.B ip rule
-.RB " [ " list " | " add " | " del " | " flush " ]"
+.RB " [ " list " | " add " | " del " | " flush " | " save " ]"
.I SELECTOR ACTION
.ti -8
+.B ip rule " restore "
+
+.ti -8
.IR SELECTOR " := [ "
.B from
.IR PREFIX " ] [ "
@@ -41,7 +44,7 @@ ip-rule \- routing policy database management
.IR TABLE_ID " ] [ "
.B nat
.IR ADDRESS " ] [ "
-.BR prohibit " | " reject " | " unreachable " ] [ " realms
+.B realms
.RI "[" SRCREALM "/]" DSTREALM " ]"
.I SUPPRESSOR
@@ -59,7 +62,7 @@ ip-rule \- routing policy database management
.SH DESCRIPTION
.I ip rule
-manipulates rules
+manipulates rules
in the routing policy database control the route selection algorithm.
.P
@@ -265,6 +268,27 @@ This command has no arguments.
This command has no arguments.
The options list or lst are synonyms with show.
+.TP
+.B ip rule save
+save rules table information to stdout
+.RS
+This command behaves like
+.BR "ip rule show"
+except that the output is raw data suitable for passing to
+.BR "ip rule restore" .
+.RE
+
+.TP
+.B ip rule restore
+restore rules table information from stdin
+.RS
+This command expects to read a data stream as returned from
+.BR "ip rule save" .
+It will attempt to restore the rules table information exactly as
+it was at the time of the save. Any rules already in the table are
+left unchanged, and duplicates are not ignored.
+.RE
+
.SH SEE ALSO
.br
.BR ip (8)
diff --git a/man/man8/ip-tunnel.8 b/man/man8/ip-tunnel.8
index c97c28ca..8b746cb0 100644
--- a/man/man8/ip-tunnel.8
+++ b/man/man8/ip-tunnel.8
@@ -6,14 +6,12 @@ ip-tunnel - tunnel configuration
.ad l
.in +8
.ti -8
-.B ip
-.RI "[ " OPTIONS " ]"
-.B tunnel
-.RI " { " COMMAND " | "
-.BR help " }"
+.B ip tunnel help
.sp
.ti -8
-.BR "ip tunnel" " { " add " | " change " | " del " | " show " | " prl " }"
+.BR "ip "
+.RI "[ " OPTIONS " ]"
+.BR "tunnel" " { " add " | " change " | " del " | " show " | " prl " }"
.RI "[ " NAME " ]"
.br
.RB "[ " mode
@@ -29,7 +27,7 @@ ip-tunnel - tunnel configuration
.br
.RB "[ " encaplimit
.IR ELIM " ]"
-.RB "[ " ttl
+.RB "[ " ttl "|" hoplimit
.IR TTL " ]"
.br
.RB "[ " tos
@@ -50,7 +48,7 @@ ip-tunnel - tunnel configuration
.ti -8
.IR MODE " := "
-.RB " { " ipip " | " gre " | " sit " | " isatap " | " ip6ip6 " | " ipip6 " | " ip6gre " | " any " }"
+.RB " { " ipip " | " gre " | " sit " | " isatap " | " vti " | " ip6ip6 " | " ipip6 " | " ip6gre " | " vti6 " | " any " }"
.ti -8
.IR ADDR " := { " IP_ADDRESS " |"
@@ -107,10 +105,10 @@ select the tunnel device name.
set the tunnel mode. Available modes depend on the encapsulating address family.
.br
Modes for IPv4 encapsulation available:
-.BR ipip ", " sit ", " isatap " and " gre "."
+.BR ipip ", " sit ", " isatap ", " vti ", and " gre "."
.br
Modes for IPv6 encapsulation available:
-.BR ip6ip6 ", " ipip6 ", " ip6gre ", and " any "."
+.BR ip6ip6 ", " ipip6 ", " ip6gre ", " vti6 ", and " any "."
.TP
.BI remote " ADDRESS"
@@ -123,7 +121,9 @@ It must be an address on another interface of this host.
.TP
.BI ttl " N"
-set a fixed TTL
+.TP
+.BI hoplimit " N"
+set a fixed TTL (IPv4) or hoplimit (IPv6)
.I N
on tunneled packets.
.I N
@@ -218,7 +218,7 @@ The
.B seq
flag is equivalent to the combination
.BR "iseq oseq" .
-.B It isn't work. Don't use it.
+.B It doesn't work. Don't use it.
.TP
.BI encaplim " ELIM"
diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8
index c9d2a2e1..dae07288 100644
--- a/man/man8/ip-xfrm.8
+++ b/man/man8/ip-xfrm.8
@@ -121,7 +121,7 @@ ip-xfrm \- transform configuration
.ti -8
.IR ALGO " :="
-.RB "{ " enc " | " auth " } "
+.RB "{ " enc " | " auth " } "
.IR ALGO-NAME " " ALGO-KEYMAT " |"
.br
.B auth-trunc
@@ -257,6 +257,13 @@ ip-xfrm \- transform configuration
.B "ip xfrm policy count"
.ti -8
+.B "ip xfrm policy set"
+.RB "[ " hthresh4
+.IR LBITS " " RBITS " ]"
+.RB "[ " hthresh6
+.IR LBITS " " RBITS " ]"
+
+.ti -8
.IR SELECTOR " :="
.RB "[ " src
.IR ADDR "[/" PLEN "] ]"
@@ -357,9 +364,20 @@ ip-xfrm \- transform configuration
.BR required " | " use
.ti -8
-.BR "ip xfrm monitor" " [ " all " |"
+.BR "ip xfrm monitor" " ["
+.BI all-nsid
+] [
+.BI all
+ |
.IR LISTofXFRM-OBJECTS " ]"
+.ti -8
+.IR LISTofXFRM-OBJECTS " := [ " LISTofXFRM-OBJECTS " ] " XFRM-OBJECT
+
+.ti -8
+.IR XFRM-OBJECT " := "
+.BR acquire " | " expire " | " SA " | " policy " | " aevent " | " report
+
.in -8
.ad b
@@ -385,7 +403,6 @@ ip xfrm state deleteall delete all existing state in xfrm
ip xfrm state list print out the list of existing state in xfrm
ip xfrm state flush flush all state in xfrm
ip xfrm state count count all existing state in xfrm
-ip xfrm monitor state monitoring for xfrm objects
.TE
.TP
@@ -507,7 +524,9 @@ encapsulates packets with protocol
.BR espinudp " or " espinudp-nonike ","
.RI "using source port " SPORT ", destination port " DPORT
.RI ", and original address " OADDR "."
+
.sp
+.PP
.TS
l l.
ip xfrm policy add add a new policy
@@ -517,7 +536,6 @@ ip xfrm policy get get an existing policy
ip xfrm policy deleteall delete all existing xfrm policies
ip xfrm policy list print out the list of xfrm policies
ip xfrm policy flush flush policies
-ip xfrm policy count count existing policies
.TE
.TP
@@ -612,7 +630,65 @@ and inbound trigger
can be
.BR required " (default) or " use "."
+.sp
+.PP
+.TS
+l l.
+ip xfrm policy count count existing policies
+.TE
+
+.PP
+Use one or more -s options to display more details, including policy hash table
+information.
+
+.sp
+.PP
+.TS
+l l.
+ip xfrm policy set configure the policy hash table
+.TE
+
+.PP
+Security policies whose address prefix lengths are greater than or equal
+policy hash table thresholds are hashed. Others are stored in the
+policy_inexact chained list.
+
+.TP
+.I LBITS
+specifies the minimum local address prefix length of policies that are
+stored in the Security Policy Database hash table.
+
+.TP
+.I RBITS
+specifies the minimum remote address prefix length of policies that are
+stored in the Security Policy Database hash table.
+
+.sp
+.PP
+.TS
+l l.
+ip xfrm monitor state monitoring for xfrm objects
+.TE
+
+.PP
The xfrm objects to monitor can be optionally specified.
+.P
+If the
+.BI all-nsid
+option is set, the program listens to all network namespaces that have a
+nsid assigned into the network namespace were the program is running.
+A prefix is displayed to show the network namespace where the message
+originates. Example:
+.sp
+.in +2
+[nsid 1]Flushed state proto 0
+.in -2
+.sp
+
.SH AUTHOR
Manpage revised by David Ward <david.ward@ll.mit.edu>
+.br
+Manpage revised by Christophe Gouault <christophe.gouault@6wind.com>
+.br
+Manpage revised by Nicolas Dichtel <nicolas.dichtel@6wind.com>
diff --git a/man/man8/ip.8 b/man/man8/ip.8
index 4cd71de2..b1f69073 100644
--- a/man/man8/ip.8
+++ b/man/man8/ip.8
@@ -12,28 +12,30 @@ ip \- show / manipulate routing, devices, policy routing and tunnels
.sp
.ti -8
-.B ip
+.B ip
.RB "[ " -force " ] "
.BI "-batch " filename
.sp
.ti -8
.IR OBJECT " := { "
-.BR link " | " addr " | " addrlabel " | " route " | " rule " | " neigh " | "\
- ntable " | " tunnel " | " tuntap " | " maddr " | " mroute " | " mrule " | "\
+.BR link " | " address " | " addrlabel " | " route " | " rule " | " neigh " | "\
+ ntable " | " tunnel " | " tuntap " | " maddress " | " mroute " | " mrule " | "\
monitor " | " xfrm " | " netns " | " l2tp " | " tcp_metrics " }"
.sp
.ti -8
.IR OPTIONS " := { "
\fB\-V\fR[\fIersion\fR] |
+\fB\-h\fR[\fIuman-readable\fR] |
\fB\-s\fR[\fItatistics\fR] |
\fB\-r\fR[\fIesolve\fR] |
\fB\-f\fR[\fIamily\fR] {
.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | "
\fB\-o\fR[\fIneline\fR] |
\fB\-n\fR[\fIetns\fR] name |
-\fB\-a\fR[\fIll\fR] }
+\fB\-a\fR[\fIll\fR] |
+\fB\-c\fR[\fIolor\fR] }
.SH OPTIONS
@@ -45,6 +47,10 @@ Print the version of the
utility and exit.
.TP
+.BR "\-h", " \-human", " \-human-readable"
+output statistics with human readable values followed by suffix.
+
+.TP
.BR "\-b", " \-batch " <FILENAME>
Read commands from provided file or standard input and invoke them.
First failure will cause termination of ip.
@@ -66,14 +72,14 @@ Output more detailed information.
.TP
.BR "\-l" , " \-loops " <COUNT>
-Specify maximum number of loops the 'ip addr flush' logic
+Specify maximum number of loops the 'ip address flush' logic
will attempt before giving up. The default is 10.
Zero (0) means loop until all addresses are removed.
.TP
.BR "\-f" , " \-family " <FAMILY>
Specifies the protocol family to use. The protocol family identifier can be one of
-.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet"
+.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet" , " mpls"
or
.BR link .
If this option is not present,
@@ -115,6 +121,11 @@ shortcut for
.BR "\-family ipx" .
.TP
+.B \-M
+shortcut for
+.BR "\-family mpls" .
+
+.TP
.B \-0
shortcut for
.BR "\-family link" .
@@ -137,7 +148,7 @@ use the system's name resolver to print DNS names instead of
host addresses.
.TP
-.BR "\-n" , " \-net" , " \-netns " <NETNS>
+.BR "\-n" , " \-netns " <NETNS>
switches
.B ip
to the specified network namespace
@@ -160,6 +171,14 @@ to
.BR "\-a" , " \-all"
executes specified command over all objects, it depends if command supports this option.
+.TP
+.BR "\-c" , " -color"
+Use color output.
+
+.TP
+.BR "\-t" , " \-timestamp"
+display current time when using monitor option.
+
.SH IP - COMMAND SYNTAX
.SS
@@ -235,7 +254,7 @@ executes specified command over all objects, it depends if command supports this
.PP
The names of all objects may be written in full or
-abbreviated form, for exampe
+abbreviated form, for example
.B address
can be abbreviated as
.B addr
@@ -265,6 +284,10 @@ Usually it is
or, if the objects of this class cannot be listed,
.BR "help" .
+.SH EXIT STATUS
+Exit status is 0 if command was successful, and 1 if there is a syntax error.
+If an error was reported by the kernel exit status is 2.
+
.SH HISTORY
.B ip
was written by Alexey N. Kuznetsov and added in Linux 2.2.
diff --git a/man/man8/lnstat.8 b/man/man8/lnstat.8
index 699ddf4c..acd5f4a2 100644
--- a/man/man8/lnstat.8
+++ b/man/man8/lnstat.8
@@ -9,9 +9,21 @@ This manual page documents briefly the
.B lnstat
command.
.PP
-\fBlnstat\fP is a generalized and more feature-complete replacement for the old rtstat program.
-In addition to routing cache statistics, it supports any kind of statistics the linux kernel
-exports via a file in /proc/net/stat/.
+\fBlnstat\fP is a generalized and more feature-complete replacement for the old
+rtstat program. It is commonly used to periodically print a selection of
+statistical values exported by the kernel.
+In addition to routing cache statistics, it supports any kind of statistics the
+linux kernel exports via a file in /proc/net/stat/.
+.PP
+Each file in /proc/net/stat/ contains a header line listing the column names.
+These names are used by \fBlnstat\fP as keys for selecting which statistics to
+print. For every CPU present in the system, a line follows which lists the
+actual values for each column of the file. \fBlnstat\fP sums these values up
+(which in fact are counters) before printing them. After each interval, only
+the difference to the last value is printed.
+.PP
+Files and columns may be selected by using the \fB-f\fP and \fB-k\fP
+parameters. By default, all columns of all files are printed.
.SH OPTIONS
lnstat supports the following options.
.TP
@@ -28,7 +40,7 @@ Print <count> number of intervals.
Dump list of available files/keys.
.TP
.B \-f, \-\-file <file>
-Statistics file to use.
+Statistics file to use, may be specified multiple times. By default all files in /proc/net/stat are scanned.
.TP
.B \-i, \-\-interval <intv>
Set interval to 'intv' seconds.
@@ -37,7 +49,9 @@ Set interval to 'intv' seconds.
Display results in JSON format
.TP
.B \-k, \-\-keys k,k,k,...
-Display only keys specified.
+Display only keys specified. Each key \fBk\fP is of the form \fB[file:]key\fP. If \fB<file>\fP
+is given, the search for the given key is limited to that file. Otherwise the first file containing
+the searched key is being used.
.TP
.B \-s, \-\-subject [0-2]
Specify display of subject/header. '0' means no header at all, '1' prints a header only at start of the program and '2' prints a header every 20 lines.
@@ -66,6 +80,179 @@ Print a header at start and every 20 lines.
.TP
.B # lnstat -c -1 -i 1 -f rt_cache -k entries,in_hit,in_slow_tot
Display statistics for keys entries, in_hit and in_slow_tot of field rt_cache every second.
+
+.SH FILES
+.TP
+.B /proc/net/stat/arp_cache, /proc/net/stat/ndisc_cache
+Statistics around neighbor cache and ARP. \fBarp_cache\fP is for IPv4, \fBndisc_cache\fP is the same for IPv6.
+.sp
+.B entries
+Number of entries in the neighbor table.
+.sp
+.B allocs
+How many neighbor entries have been allocated.
+.sp
+.B destroys
+How many neighbor entries have been removed.
+.sp
+.B hash_grows
+How often the neighbor (hash) table was increased.
+.sp
+.B lookups
+How many lookups were performed.
+.sp
+.B hits
+How many \fBlookups\fP were successful.
+.sp
+.B res_failed
+How many neighbor lookups failed.
+.sp
+.B rcv_probes_mcast
+How many multicast neighbor solicitations were received. (IPv6 only.)
+.sp
+.B rcv_probes_ucast
+How many unicast neighbor solicitations were received. (IPv6 only.)
+.sp
+.B periodic_gc_runs
+How many garbage collection runs were executed.
+.sp
+.B forced_gc_runs
+How many forced garbage collection runs were executed. Happens when adding an
+entry and the table is too full.
+.sp
+.B unresolved_discards
+How many neighbor table entries were discarded due to lookup failure.
+.sp
+.B table_fulls
+Number of table overflows. Happens if table is full and forced GC run (see
+\fBforced_gc_runs\fP) has failed.
+
+.TP
+.B /proc/net/stat/ip_conntrack, /proc/net/stat/nf_conntrack
+Conntrack related counters. \fBip_conntrack\fP is for backwards compatibility
+with older userspace only and shows the same data as \fBnf_conntrack\fP.
+.sp
+.B entries
+Number of entries in conntrack table.
+.sp
+.B searched
+Number of conntrack table lookups performed.
+.sp
+.B found
+Number of \fBsearched\fP entries which were successful.
+.sp
+.B new
+Number of conntrack entries added which were not expected before.
+.sp
+.B invalid
+Number of packets seen which can not be tracked.
+.sp
+.B ignore
+Number of packets seen which are already connected to a conntrack entry.
+.sp
+.B delete
+Number of conntrack entries which were removed.
+.sp
+.B delete_list
+Number of conntrack entries which were put to dying list.
+.sp
+.B insert
+Number of entries inserted into the list.
+.sp
+.B insert_failed
+Number of entries for which list insertion was attempted but failed (happens if
+the same entry is already present).
+.sp
+.B drop
+Number of packets dropped due to conntrack failure. Either new conntrack entry
+allocation failed, or protocol helper dropped the packet.
+.sp
+.B early_drop
+Number of dropped conntrack entries to make room for new ones, if maximum table
+size was reached.
+.sp
+.B icmp_error
+Number of packets which could not be tracked due to error situation. This is a
+subset of \fBinvalid\fP.
+.sp
+.B expect_new
+Number of conntrack entries added after an expectation for them was already
+present.
+.sp
+.B expect_create
+Number of expectations added.
+.sp
+.B expect_delete
+Number of expectations deleted.
+.sp
+.B search_restart
+Number of conntrack table lookups which had to be restarted due to hashtable
+resizes.
+
+.TP
+.B /proc/net/stat/rt_cache
+Routing cache statistics.
+.sp
+.B entries
+Number of entries in routing cache.
+.sp
+.B in_hit
+Number of route cache hits for incoming packets. Deprecated since IP route
+cache removal, therefore always zero.
+.sp
+.B in_slow_tot
+Number of routing cache entries added for input traffic.
+.sp
+.B in_slow_mc
+Number of multicast routing cache entries added for input traffic.
+.sp
+.B in_no_route
+Number of input packets for which no routing table entry was found.
+.sp
+.B in_brd
+Number of matched input broadcast packets.
+.sp
+.B in_martian_dst
+Number of incoming martian destination packets.
+.sp
+.B in_martian_src
+Number of incoming martian source packets.
+.sp
+.B out_hit
+Number of route cache hits for outgoing packets. Deprecated since IP route
+cache removal, therefore always zero.
+.sp
+.B out_slow_tot
+Number of routing cache entries added for output traffic.
+.sp
+.B out_slow_mc
+Number of multicast routing cache entries added for output traffic.
+.sp
+.B gc_total
+Total number of garbage collection runs. Deprecated since IP route cache
+removal, therefore always zero.
+.sp
+.B gc_ignored
+Number of ignored garbage collection runs due to minimum GC interval not
+reached and routing cache not full. Deprecated since IP route cache removal,
+therefore always zero.
+.sp
+.B gc_goal_miss
+Number of garbage collector goal misses. Deprecated since IP route cache
+removal, therefore always zero.
+.sp
+.B gc_dst_overflow
+Number of destination cache overflows. Deprecated since IP route cache removal,
+therefore always zero.
+.sp
+.B in_hlist_search
+Number of hash table list traversals for input traffic. Deprecated since IP
+route cache removal, therefore always zero.
+.sp
+.B out_hlist_search
+Number of hash table list traversals for output traffic. Deprecated since IP
+route cache removal, therefore always zero.
+
.SH SEE ALSO
.BR ip (8),
and /usr/share/doc/iproute-doc/README.lnstat (package iproute-doc on Debian)
diff --git a/man/man8/routel.8 b/man/man8/routel.8
index cdf8f55b..82d580fb 100644
--- a/man/man8/routel.8
+++ b/man/man8/routel.8
@@ -1,16 +1,16 @@
.TH "ROUTEL" "8" "3 Jan, 2008" "iproute2" "Linux"
.SH "NAME"
-.LP
+.LP
routel \- list routes with pretty output format
.br
routef \- flush routes
.SH "SYNTAX"
-.LP
+.LP
routel [\fItablenr\fP [\fIraw ip args...\fP]]
-.br
+.br
routef
.SH "DESCRIPTION"
-.LP
+.LP
These programs are a set of helper scripts you can use instead of raw iproute2 commands.
.br
The routel script will list routes in a format that some might consider easier to interpret then the ip route list equivalent.
@@ -18,15 +18,15 @@ The routel script will list routes in a format that some might consider easier t
The routef script does not take any arguments and will simply flush the routing table down the drain. Beware! This means deleting all routes which will make your network unusable!
.SH "FILES"
-.LP
-\fI/usr/bin/routef\fP
-.br
-\fI/usr/bin/routel\fP
+.LP
+\fI/usr/bin/routef\fP
+.br
+\fI/usr/bin/routel\fP
.SH "AUTHORS"
-.LP
+.LP
The routel script was written by Stephen R. van den Berg <srb@cuci.nl>, 1999/04/18 and donated to the public domain.
.br
This manual page was written by Andreas Henriksson <andreas@fatal.se>, for the Debian GNU/Linux system.
.SH "SEE ALSO"
-.LP
+.LP
ip(8)
diff --git a/man/man8/rtacct.8 b/man/man8/rtacct.8
index c3ab03de..7cf97aa4 100644
--- a/man/man8/rtacct.8
+++ b/man/man8/rtacct.8
@@ -47,4 +47,3 @@ Time interval to average rates. Default value is 60 seconds.
.SH SEE ALSO
lnstat(8)
-
diff --git a/man/man8/rtmon.8 b/man/man8/rtmon.8
index 05387520..38a2b774 100644
--- a/man/man8/rtmon.8
+++ b/man/man8/rtmon.8
@@ -10,11 +10,11 @@ This manual page documents briefly the
command.
.PP
.B rtmon
-listens on
-.I netlink
+listens on
+.I netlink
socket and monitors routing table changes.
-.I rtmon
+.I rtmon
can be started before the first network configuration command is issued.
For example if you insert:
@@ -61,7 +61,7 @@ to display logged output from file.
.SH SEE ALSO
.BR ip (8)
.SH AUTHOR
-.B rtmon
+.B rtmon
was written by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>.
.PP
This manual page was written by Michael Prokop <mika@grml.org>,
diff --git a/man/man8/rtpr.8 b/man/man8/rtpr.8
new file mode 100644
index 00000000..5e32b2ee
--- /dev/null
+++ b/man/man8/rtpr.8
@@ -0,0 +1,25 @@
+.TH RTPR 8 "18 September, 2015"
+
+.SH NAME
+rtpr \- replace backslashes with newlines.
+
+.SH DESCRIPTION
+.B rtpr
+is a trivial bash script which converts backslashes in standard input to newlines. It's sole purpose is to be fed with input from
+.B ip
+when executed with it's
+.B --oneline
+flag.
+
+.SH EXAMPLES
+.TP
+ip --onenline address show | rtpr
+Undo oneline converted
+.B ip-address
+output.
+
+.SH SEE ALSO
+.BR ip (8)
+
+.SH AUTHORS
+Stephen Hemminger <shemming@brocade.com>
diff --git a/man/man8/ss.8 b/man/man8/ss.8
index b7fbaefa..758460c2 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -12,8 +12,8 @@ to
It can display more TCP and state informations than other tools.
.SH OPTIONS
-When no option is used ss displays a list of
-open non-listening TCP sockets that have established connection.
+When no option is used ss displays a list of
+open non-listening sockets (e.g. TCP/UNIX/UDP) that have established connection.
.TP
.B \-h, \-\-help
Show summary of options.
@@ -48,6 +48,11 @@ Show process using socket.
.B \-i, \-\-info
Show internal TCP information.
.TP
+.B \-K, \-\-kill
+Attempts to forcibly close sockets. This option displays sockets that are
+successfully closed and silently skips sockets that the kernel does not support
+closing. It supports IPv4 and IPv6 sockets only.
+.TP
.B \-s, \-\-summary
Print summary statistics. This option does not parse socket lists obtaining
summary from various sources. It is useful when amount of sockets is so huge
@@ -189,10 +194,10 @@ List all the tcp sockets in state FIN-WAIT-1 for our apache to network 193.233.7
.BR /usr/share/doc/iproute-doc/ss.html " (package iproute­doc)",
.br
.BR RFC " 793 "
-- https://tools.ietf.org/rfc/rfc793.txt (TCP states)
+- https://tools.ietf.org/rfc/rfc793.txt (TCP states)
.SH AUTHOR
-.I ss
+.I ss
was written by Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>.
.PP
This manual page was written by Michael Prokop <mika@grml.org>
diff --git a/man/man8/tc-basic.8 b/man/man8/tc-basic.8
new file mode 100644
index 00000000..fb39eaa9
--- /dev/null
+++ b/man/man8/tc-basic.8
@@ -0,0 +1,34 @@
+.TH "Basic classifier in tc" 8 "21 Oct 2015" "iproute2" "Linux"
+
+.SH NAME
+basic \- basic traffic control filter
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR tc " " filter " ... " basic " [ " match
+.IR EMATCH_TREE " ] [ "
+.B action
+.IR ACTION_SPEC " ] [ "
+.B classid
+.IR CLASSID " ]"
+.SH DESCRIPTION
+The
+.B basic
+filter allows to classify packets using the extended match infrastructure.
+.SH OPTIONS
+.TP
+.BI action " ACTION_SPEC"
+Apply an action from the generic actions framework on matching packets.
+.TP
+.BI classid " CLASSID"
+Push matching packets into the class identified by
+.IR CLASSID .
+.TP
+.BI match " EMATCH_TREE"
+Match packets using the extended match infrastructure. See
+.BR tc-ematch (8)
+for a detailed description of the allowed syntax in
+.IR EMATCH_TREE .
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-ematch (8)
diff --git a/man/man8/tc-bfifo.8 b/man/man8/tc-bfifo.8
index f04090c0..3e290322 100644
--- a/man/man8/tc-bfifo.8
+++ b/man/man8/tc-bfifo.8
@@ -6,37 +6,37 @@ bfifo \- Byte limited First In, First Out queue
.SH SYNOPSIS
.B tc qdisc ... add pfifo
-.B [ limit
+.B [ limit
packets
.B ]
.P
.B tc qdisc ... add bfifo
-.B [ limit
+.B [ limit
bytes
.B ]
.SH DESCRIPTION
The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the
-simplest queues possible and therefore have no overhead.
+simplest queues possible and therefore have no overhead.
.B pfifo
-constrains the queue size as measured in packets.
+constrains the queue size as measured in packets.
.B bfifo
does so as measured in bytes.
-Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer
+Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer
pfifo or bfifo over the default.
.SH ALGORITHM
A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of
-a list. When a packet needs to be sent out to the network, it is taken from the head of the list.
+a list. When a packet needs to be sent out to the network, it is taken from the head of the list.
If the list is too long, no further packets are allowed on. This is called 'tail drop'.
.SH PARAMETERS
-.TP
+.TP
limit
-Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults
-to the interface txqueuelen, as specified with
+Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults
+to the interface txqueuelen, as specified with
.BR ifconfig (8)
or
.BR ip (8).
@@ -48,20 +48,20 @@ The range for this parameter is [0, UINT32_MAX] bytes.
Note: The link layer header was considered when counting packets length.
.SH OUTPUT
-The output of
+The output of
.B tc -s qdisc ls
-contains the limit, either in packets or in bytes, and the number of bytes
-and packets actually sent. An unsent and dropped packet only appears between braces
+contains the limit, either in packets or in bytes, and the number of bytes
+and packets actually sent. An unsent and dropped packet only appears between braces
and is not counted as 'Sent'.
-In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets.
+In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets.
No packets were dropped, and as the pfifo queue does not slow down packets, there were also no
overlimits:
.P
.nf
-# tc -s qdisc ls dev eth0
+# tc -s qdisc ls dev eth0
qdisc pfifo 8001: dev eth0 limit 100p
- Sent 45894 bytes 681 pkts (dropped 0, overlimits 0)
+ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0)
.fi
If a backlog occurs, this is displayed as well.
@@ -72,5 +72,3 @@ If a backlog occurs, this is displayed as well.
Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
This manpage maintained by bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-bpf.8 b/man/man8/tc-bpf.8
new file mode 100644
index 00000000..c8d5c5f9
--- /dev/null
+++ b/man/man8/tc-bpf.8
@@ -0,0 +1,924 @@
+.TH "BPF classifier and actions in tc" 8 "18 May 2015" "iproute2" "Linux"
+.SH NAME
+BPF \- BPF programmable classifier and actions for ingress/egress
+queueing disciplines
+.SH SYNOPSIS
+.SS eBPF classifier (filter) or action:
+.B tc filter ... bpf
+[
+.B object-file
+OBJ_FILE ] [
+.B section
+CLS_NAME ] [
+.B export
+UDS_FILE ] [
+.B verbose
+] [
+.B police
+POLICE_SPEC ] [
+.B action
+ACTION_SPEC ] [
+.B classid
+CLASSID ]
+.br
+.B tc action ... bpf
+[
+.B object-file
+OBJ_FILE ] [
+.B section
+CLS_NAME ] [
+.B export
+UDS_FILE ] [
+.B verbose
+]
+
+.SS cBPF classifier (filter) or action:
+.B tc filter ... bpf
+[
+.B bytecode-file
+BPF_FILE |
+.B bytecode
+BPF_BYTECODE ] [
+.B police
+POLICE_SPEC ] [
+.B action
+ACTION_SPEC ] [
+.B classid
+CLASSID ]
+.br
+.B tc action ... bpf
+[
+.B bytecode-file
+BPF_FILE |
+.B bytecode
+BPF_BYTECODE ]
+
+.SH DESCRIPTION
+
+Extended Berkeley Packet Filter (
+.B eBPF
+) and classic Berkeley Packet Filter
+(originally known as BPF, for better distinction referred to as
+.B cBPF
+here) are both available as a fully programmable and highly efficient
+classifier and actions. They both offer a minimal instruction set for
+implementing small programs which can safely be loaded into the kernel
+and thus executed in a tiny virtual machine from kernel space. An in-kernel
+verifier guarantees that a specified program always terminates and neither
+crashes nor leaks data from the kernel.
+
+In Linux, it's generally considered that eBPF is the successor of cBPF.
+The kernel internally transforms cBPF expressions into eBPF expressions and
+executes the latter. Execution of them can be performed in an interpreter
+or at setup time, they can be just-in-time compiled (JIT'ed) to run as
+native machine code. Currently, x86_64, ARM64 and s390 architectures have
+eBPF JIT support, whereas PPC, SPARC, ARM and MIPS have cBPF, but did not
+(yet) switch to eBPF JIT support.
+
+eBPF's instruction set has similar underlying principles as the cBPF
+instruction set, it however is modelled closer to the underlying
+architecture to better mimic native instruction sets with the aim to
+achieve a better run-time performance. It is designed to be JIT'ed with
+a one to one mapping, which can also open up the possibility for compilers
+to generate optimized eBPF code through an eBPF backend that performs
+almost as fast as natively compiled code. Given that LLVM provides such
+an eBPF backend, eBPF programs can therefore easily be programmed in a
+subset of the C language. Other than that, eBPF infrastructure also comes
+with a construct called "maps". eBPF maps are key/value stores that are
+shared between multiple eBPF programs, but also between eBPF programs and
+user space applications.
+
+For the traffic control subsystem, classifier and actions that can be
+attached to ingress and egress qdiscs can be written in eBPF or cBPF. The
+advantage over other classifier and actions is that eBPF/cBPF provides the
+generic framework, while users can implement their highly specialized use
+cases efficiently. This means that the classifier or action written that
+way will not suffer from feature bloat, and can therefore execute its task
+highly efficient. It allows for non-linear classification and even merging
+the action part into the classification. Combined with efficient eBPF map
+data structures, user space can push new policies like classids into the
+kernel without reloading a classifier, or it can gather statistics that
+are pushed into one map and use another one for dynamically load balancing
+traffic based on the determined load, just to provide a few examples.
+
+.SH PARAMETERS
+.SS object-file
+points to an object file that has an executable and linkable format (ELF)
+and contains eBPF opcodes and eBPF map definitions. The LLVM compiler
+infrastructure with
+.B clang(1)
+as a C language front end is one project that supports emitting eBPF object
+files that can be passed to the eBPF classifier (more details in the
+.B EXAMPLES
+section). This option is mandatory when an eBPF classifier or action is
+to be loaded.
+
+.SS section
+is the name of the ELF section from the object file, where the eBPF
+classifier or action resides. By default the section name for the
+classifier is called "classifier", and for the action "action". Given
+that a single object file can contain multiple classifier and actions,
+the corresponding section name needs to be specified, if it differs
+from the defaults.
+
+.SS export
+points to a Unix domain socket file. In case the eBPF object file also
+contains a section named "maps" with eBPF map specifications, then the
+map file descriptors can be handed off via the Unix domain socket to
+an eBPF "agent" herding all descriptors after tc lifetime. This can be
+some third party application implementing the IPC counterpart for the
+import, that uses them for calling into
+.B bpf(2)
+system call to read out or update eBPF map data from user space, for
+example, for monitoring purposes or to push down new policies.
+
+.SS verbose
+if set, it will dump the eBPF verifier output, even if loading the eBPF
+program was successful. By default, only on error, the verifier log is
+being emitted to the user.
+
+.SS police
+is an optional parameter for an eBPF/cBPF classifier that specifies a
+police in
+.B tc(1)
+which is attached to the classifier, for example, on an ingress qdisc.
+
+.SS action
+is an optional parameter for an eBPF/cBPF classifier that specifies a
+subsequent action in
+.B tc(1)
+which is attached to a classifier.
+
+.SS classid
+.SS flowid
+provides the default traffic control class identifier for this eBPF/cBPF
+classifier. The default class identifier can also be overwritten by the
+return code of the eBPF/cBPF program. A default return code of
+.B -1
+specifies the here provided default class identifier to be used. A return
+code of the eBPF/cBPF program of 0 implies that no match took place, and
+a return code other than these two will override the default classid. This
+allows for efficient, non-linear classification with only a single eBPF/cBPF
+program as opposed to having multiple individual programs for various class
+identifiers which would need to reparse packet contents.
+
+.SS bytecode
+is being used for loading cBPF classifier and actions only. The cBPF bytecode
+is directly passed as a text string in the form of
+.B \'s,c t f k,c t f k,c t f k,...\'
+, where
+.B s
+denotes the number of subsequent 4-tuples. One such 4-tuple consists of
+.B c t f k
+decimals, where
+.B c
+represents the cBPF opcode,
+.B t
+the jump true offset target,
+.B f
+the jump false offset target and
+.B k
+the immediate constant/literal. There are various tools that generate code
+in this loadable format, for example,
+.B bpf_asm
+that ships with the Linux kernel source tree under
+.B tools/net/
+, so it is certainly not expected to hack this by hand. The
+.B bytecode
+or
+.B bytecode-file
+option is mandatory when a cBPF classifier or action is to be loaded.
+
+.SS bytecode-file
+also being used to load a cBPF classifier or action. It's effectively the
+same as
+.B bytecode
+only that the cBPF bytecode is not passed directly via command line, but
+rather resides in a text file.
+
+.SH EXAMPLES
+.SS eBPF TOOLING
+A full blown example including eBPF agent code can be found inside the
+iproute2 source package under:
+.B examples/bpf/
+
+As prerequisites, the kernel needs to have the eBPF system call namely
+.B bpf(2)
+enabled and ships with
+.B cls_bpf
+and
+.B act_bpf
+kernel modules for the traffic control subsystem. To enable eBPF/eBPF JIT
+support, depending which of the two the given architecture supports:
+
+.in +4n
+.B echo 1 > /proc/sys/net/core/bpf_jit_enable
+.in
+
+A given restricted C file can be compiled via LLVM as:
+
+.in +4n
+.B clang -O2 -emit-llvm -c bpf.c -o - | llc -march=bpf -filetype=obj -o bpf.o
+.in
+
+The compiler invocation might still simplify in future, so for now,
+it's quite handy to alias this construct in one way or another, for
+example:
+.in +4n
+.nf
+.sp
+__bcc() {
+ clang -O2 -emit-llvm -c $1 -o - | \\
+ llc -march=bpf -filetype=obj -o "`basename $1 .c`.o"
+}
+
+alias bcc=__bcc
+.fi
+.in
+
+A minimal, stand-alone unit, which matches on all traffic with the
+default classid (return code of -1) looks like:
+
+.in +4n
+.nf
+.sp
+#include <linux/bpf.h>
+
+#ifndef __section
+# define __section(x) __attribute__((section(x), used))
+#endif
+
+__section("classifier") int cls_main(struct __sk_buff *skb)
+{
+ return -1;
+}
+
+char __license[] __section("license") = "GPL";
+.fi
+.in
+
+More examples can be found further below in subsection
+.B eBPF PROGRAMMING
+as focus here will be on tooling.
+
+There can be various other sections, for example, also for actions.
+Thus, an object file in eBPF can contain multiple entrance points.
+Always a specific entrance point, however, must be specified when
+configuring with tc. A license must be part of the restricted C code
+and the license string syntax is the same as with Linux kernel modules.
+The kernel reserves its right that some eBPF helper functions can be
+restricted to GPL compatible licenses only, and thus may reject a program
+from loading into the kernel when such a license mismatch occurs.
+
+The resulting object file from the compilation can be inspected with
+the usual set of tools that also operate on normal object files, for
+example
+.B objdump(1)
+for inspecting ELF section headers:
+
+.in +4n
+.nf
+.sp
+objdump -h bpf.o
+[...]
+3 classifier 000007f8 0000000000000000 0000000000000000 00000040 2**3
+ CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
+4 action-mark 00000088 0000000000000000 0000000000000000 00000838 2**3
+ CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
+5 action-rand 00000098 0000000000000000 0000000000000000 000008c0 2**3
+ CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE
+6 maps 00000030 0000000000000000 0000000000000000 00000958 2**2
+ CONTENTS, ALLOC, LOAD, DATA
+7 license 00000004 0000000000000000 0000000000000000 00000988 2**0
+ CONTENTS, ALLOC, LOAD, DATA
+[...]
+.fi
+.in
+
+Adding an eBPF classifier from an object file that contains a classifier
+in the default ELF section is trivial (note that instead of "object-file"
+also shortcuts such as "obj" can be used):
+
+.in +4n
+.B bcc bpf.c
+.br
+.B tc filter add dev em1 parent 1: bpf obj bpf.o flowid 1:1
+.in
+
+In case the classifier resides in ELF section "mycls", then that same
+command needs to be invoked as:
+
+.in +4n
+.B tc filter add dev em1 parent 1: bpf obj bpf.o sec mycls flowid 1:1
+.in
+
+Dumping the classifier configuration will tell the location of the
+classifier, in other words that it's from object file "bpf.o" under
+section "mycls":
+
+.in +4n
+.B tc filter show dev em1
+.br
+.B filter parent 1: protocol all pref 49152 bpf
+.br
+.B filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[mycls]
+.in
+
+The same program can also be installed on ingress qdisc side as opposed
+to egress ...
+
+.in +4n
+.B tc qdisc add dev em1 handle ffff: ingress
+.br
+.B tc filter add dev em1 parent ffff: bpf obj bpf.o sec mycls flowid ffff:1
+.in
+
+\&... and again dumped from there:
+
+.in +4n
+.B tc filter show dev em1 parent ffff:
+.br
+.B filter protocol all pref 49152 bpf
+.br
+.B filter protocol all pref 49152 bpf handle 0x1 flowid ffff:1 bpf.o:[mycls]
+.in
+
+Attaching a classifier and action on ingress has the restriction that
+it doesn't have an actual underlying queueing discipline. What ingress
+can do is to classify, mangle, redirect or drop packets. When queueing
+is required on ingress side, then ingress must redirect packets to the
+.B ifb
+device, otherwise policing can be used. Moreover, ingress can be used to
+have an early drop point of unwanted packets before they hit upper layers
+of the networking stack, perform network accounting with eBPF maps that
+could be shared with egress, or have an early mangle and/or redirection
+point to different networking devices.
+
+Multiple eBPF actions and classifier can be placed into a single
+object file within various sections. In that case, non-default section
+names must be provided, which is the case for both actions in this
+example:
+
+.in +4n
+.B tc filter add dev em1 parent 1: bpf obj bpf.o flowid 1:1 \e
+.br
+.in +25n
+.B action bpf obj bpf.o sec action-mark \e
+.br
+.B action bpf obj bpf.o sec action-rand ok
+.in -25n
+.in -4n
+
+The advantage of this is that the classifier and the two actions can
+then share eBPF maps with each other, if implemented in the programs.
+
+In order to access eBPF maps from user space beyond
+.B tc(8)
+setup lifetime, the ownership can be transferred to an eBPF agent via
+Unix domain sockets. There are two possibilities for implementing this:
+
+.B 1)
+implementation of an own eBPF agent that takes care of setting up
+the Unix domain socket and implementing the protocol that
+.B tc(8)
+dictates. A code example of this can be found inside the iproute2
+source package under:
+.B examples/bpf/
+
+.B 2)
+use
+.B tc exec
+for transferring the eBPF map file descriptors through a Unix domain
+socket, and spawning an application such as
+.B sh(1)
+\&. This approach's advantage is that tc will place the file descriptors
+into the environment and thus make them available just like stdin, stdout,
+stderr file descriptors, meaning, in case user applications run from within
+this fd-owner shell, they can terminate and restart without losing eBPF
+maps file descriptors. Example invocation with the previous classifier and
+action mixture:
+
+.in +4n
+.B tc exec bpf imp /tmp/bpf
+.br
+.B tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf flowid 1:1 \e
+.br
+.in +25n
+.B action bpf obj bpf.o sec action-mark \e
+.br
+.B action bpf obj bpf.o sec action-rand ok
+.in -25n
+.in -4n
+
+Assuming that eBPF maps are shared with classifier and actions, it's
+enough to export them once, for example, from within the classifier
+or action command. tc will setup all eBPF map file descriptors at the
+time when the object file is first parsed.
+
+When a shell has been spawned, the environment will have a couple of
+eBPF related variables. BPF_NUM_MAPS provides the total number of maps
+that have been transferred over the Unix domain socket. BPF_MAP<X>'s
+value is the file descriptor number that can be accessed in eBPF agent
+applications, in other words, it can directly be used as the file
+descriptor value for the
+.B bpf(2)
+system call to retrieve or alter eBPF map values. <X> denotes the
+identifier of the eBPF map. It corresponds to the
+.B id
+member of
+.B struct bpf_elf_map
+\& from the tc eBPF map specification.
+
+The environment in this example looks as follows:
+
+.in +4n
+.nf
+.sp
+sh# env | grep BPF
+ BPF_NUM_MAPS=3
+ BPF_MAP1=6
+ BPF_MAP0=5
+ BPF_MAP2=7
+sh# ls -la /proc/self/fd
+ [...]
+ lrwx------. 1 root root 64 Apr 14 16:46 5 -> anon_inode:bpf-map
+ lrwx------. 1 root root 64 Apr 14 16:46 6 -> anon_inode:bpf-map
+ lrwx------. 1 root root 64 Apr 14 16:46 7 -> anon_inode:bpf-map
+sh# my_bpf_agent
+.fi
+.in
+
+eBPF agents are very useful in that they can prepopulate eBPF maps from
+user space, monitor statistics via maps and based on that feedback, for
+example, rewrite classids in eBPF map values during runtime. Given that eBPF
+agents are implemented as normal applications, they can also dynamically
+receive traffic control policies from external controllers and thus push
+them down into eBPF maps to dynamically adapt to network conditions. Moreover,
+eBPF maps can also be shared with other eBPF program types (e.g. tracing),
+thus very powerful combination can therefore be implemented.
+
+.SS eBPF PROGRAMMING
+
+eBPF classifier and actions are being implemented in restricted C syntax
+(in future, there could additionally be new language frontends supported).
+
+The header file
+.B linux/bpf.h
+provides eBPF helper functions that can be called from an eBPF program.
+This man page will only provide two minimal, stand-alone examples, have a
+look at
+.B examples/bpf
+from the iproute2 source package for a fully fledged flow dissector
+example to better demonstrate some of the possibilities with eBPF.
+
+Supported 32 bit classifier return codes from the C program and their meanings:
+.in +4n
+.B 0
+, denotes a mismatch
+.br
+.B -1
+, denotes the default classid configured from the command line
+.br
+.B else
+, everything else will override the default classid to provide a facility for
+non-linear matching
+.in
+
+Supported 32 bit action return codes from the C program and their meanings (
+.B linux/pkt_cls.h
+):
+.in +4n
+.B TC_ACT_OK (0)
+, will terminate the packet processing pipeline and allows the packet to
+proceed
+.br
+.B TC_ACT_SHOT (2)
+, will terminate the packet processing pipeline and drops the packet
+.br
+.B TC_ACT_UNSPEC (-1)
+, will use the default action configured from tc (similarly as returning
+.B -1
+from a classifier)
+.br
+.B TC_ACT_PIPE (3)
+, will iterate to the next action, if available
+.br
+.B TC_ACT_RECLASSIFY (1)
+, will terminate the packet processing pipeline and start classification
+from the beginning
+.br
+.B else
+, everything else is an unspecified return code
+.in
+
+Both classifier and action return codes are supported in eBPF and cBPF
+programs.
+
+To demonstrate restricted C syntax, a minimal toy classifier example is
+provided, which assumes that egress packets, for instance originating
+from a container, have previously been marked in interval [0, 255]. The
+program keeps statistics on different marks for user space and maps the
+classid to the root qdisc with the marking itself as the minor handle:
+
+.in +4n
+.nf
+.sp
+#include <stdint.h>
+#include <asm/types.h>
+
+#include <linux/bpf.h>
+#include <linux/pkt_sched.h>
+
+#include "helpers.h"
+
+struct tuple {
+ long packets;
+ long bytes;
+};
+
+#define BPF_MAP_ID_STATS 1 /* agent's map identifier */
+#define BPF_MAX_MARK 256
+
+struct bpf_elf_map __section("maps") map_stats = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .id = BPF_MAP_ID_STATS,
+ .size_key = sizeof(uint32_t),
+ .size_value = sizeof(struct tuple),
+ .max_elem = BPF_MAX_MARK,
+};
+
+static inline void cls_update_stats(const struct __sk_buff *skb,
+ uint32_t mark)
+{
+ struct tuple *tu;
+
+ tu = bpf_map_lookup_elem(&map_stats, &mark);
+ if (likely(tu)) {
+ __sync_fetch_and_add(&tu->packets, 1);
+ __sync_fetch_and_add(&tu->bytes, skb->len);
+ }
+}
+
+__section("cls") int cls_main(struct __sk_buff *skb)
+{
+ uint32_t mark = skb->mark;
+
+ if (unlikely(mark >= BPF_MAX_MARK))
+ return 0;
+
+ cls_update_stats(skb, mark);
+
+ return TC_H_MAKE(TC_H_ROOT, mark);
+}
+
+char __license[] __section("license") = "GPL";
+.fi
+.in
+
+Another small example is a port redirector which demuxes destination port
+80 into the interval [8080, 8087] steered by RSS, that can then be attached
+to ingress qdisc. The exercise of adding the egress counterpart and IPv6
+support is left to the reader:
+
+.in +4n
+.nf
+.sp
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+
+#include "helpers.h"
+
+static inline void set_tcp_dport(struct __sk_buff *skb, int nh_off,
+ __u16 old_port, __u16 new_port)
+{
+ bpf_l4_csum_replace(skb, nh_off + offsetof(struct tcphdr, check),
+ old_port, new_port, sizeof(new_port));
+ bpf_skb_store_bytes(skb, nh_off + offsetof(struct tcphdr, dest),
+ &new_port, sizeof(new_port), 0);
+}
+
+static inline int lb_do_ipv4(struct __sk_buff *skb, int nh_off)
+{
+ __u16 dport, dport_new = 8080, off;
+ __u8 ip_proto, ip_vl;
+
+ ip_proto = load_byte(skb, nh_off +
+ offsetof(struct iphdr, protocol));
+ if (ip_proto != IPPROTO_TCP)
+ return 0;
+
+ ip_vl = load_byte(skb, nh_off);
+ if (likely(ip_vl == 0x45))
+ nh_off += sizeof(struct iphdr);
+ else
+ nh_off += (ip_vl & 0xF) << 2;
+
+ dport = load_half(skb, nh_off + offsetof(struct tcphdr, dest));
+ if (dport != 80)
+ return 0;
+
+ off = skb->queue_mapping & 7;
+ set_tcp_dport(skb, nh_off - BPF_LL_OFF, __constant_htons(80),
+ __cpu_to_be16(dport_new + off));
+ return -1;
+}
+
+__section("lb") int lb_main(struct __sk_buff *skb)
+{
+ int ret = 0, nh_off = BPF_LL_OFF + ETH_HLEN;
+
+ if (likely(skb->protocol == __constant_htons(ETH_P_IP)))
+ ret = lb_do_ipv4(skb, nh_off);
+
+ return ret;
+}
+
+char __license[] __section("license") = "GPL";
+.fi
+.in
+
+The related helper header file
+.B helpers.h
+in both examples was:
+
+.in +4n
+.nf
+.sp
+/* Misc helper macros. */
+#define __section(x) __attribute__((section(x), used))
+#define offsetof(x, y) __builtin_offsetof(x, y)
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+/* Used map structure */
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 id;
+};
+
+/* Some used BPF function calls. */
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from,
+ int len, int flags) =
+ (void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from,
+ int to, int flags) =
+ (void *) BPF_FUNC_l4_csum_replace;
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+
+/* Some used BPF intrinsics. */
+unsigned long long load_byte(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.half");
+.fi
+.in
+
+Best practice, we recommend to only have a single eBPF classifier loaded
+in tc and perform
+.B all
+necessary matching and mangling from there instead of a list of individual
+classifier and separate actions. Just a single classifier tailored for a
+given use-case will be most efficient to run.
+
+.SS eBPF DEBUGGING
+
+Both tc
+.B filter
+and
+.B action
+commands for
+.B bpf
+support an optional
+.B verbose
+parameter that can be used to inspect the eBPF verifier log. It is dumped
+by default in case of an error.
+
+In case the eBPF/cBPF JIT compiler has been enabled, it can also be
+instructed to emit a debug output of the resulting opcode image into
+the kernel log, which can be read via
+.B dmesg(1)
+:
+
+.in +4n
+.B echo 2 > /proc/sys/net/core/bpf_jit_enable
+.in
+
+The Linux kernel source tree ships additionally under
+.B tools/net/
+a small helper called
+.B bpf_jit_disasm
+that reads out the opcode image dump from the kernel log and dumps the
+resulting disassembly:
+
+.in +4n
+.B bpf_jit_disasm -o
+.in
+
+Other than that, the Linux kernel also contains an extensive eBPF/cBPF
+test suite module called
+.B test_bpf
+\&. Upon ...
+
+.in +4n
+.B modprobe test_bpf
+.in
+
+\&... it performs a diversity of test cases and dumps the results into
+the kernel log that can be inspected with
+.B dmesg(1)
+\&. The results can differ depending on whether the JIT compiler is enabled
+or not. In case of failed test cases, the module will fail to load. In
+such cases, we urge you to file a bug report to the related JIT authors,
+Linux kernel and networking mailing lists.
+
+.SS cBPF
+
+Although we generally recommend switching to implementing
+.B eBPF
+classifier and actions, for the sake of completeness, a few words on how to
+program in cBPF will be lost here.
+
+Likewise, the
+.B bpf_jit_enable
+switch can be enabled as mentioned already. Tooling such as
+.B bpf_jit_disasm
+is also independent whether eBPF or cBPF code is being loaded.
+
+Unlike in eBPF, classifier and action are not implemented in restricted C,
+but rather in a minimal assembler-like language or with the help of other
+tooling.
+
+The raw interface with tc takes opcodes directly. For example, the most
+minimal classifier matching on every packet resulting in the default
+classid of 1:1 looks like:
+
+.in +4n
+.B tc filter add dev em1 parent 1: bpf bytecode '1,6 0 0 4294967295,' flowid 1:1
+.in
+
+The first decimal of the bytecode sequence denotes the number of subsequent
+4-tuples of cBPF opcodes. As mentioned, such a 4-tuple consists of
+.B c t f k
+decimals, where
+.B c
+represents the cBPF opcode,
+.B t
+the jump true offset target,
+.B f
+the jump false offset target and
+.B k
+the immediate constant/literal. Here, this denotes an unconditional return
+from the program with immediate value of -1.
+
+Thus, for egress classification, Willem de Bruijn implemented a minimal stand-alone
+helper tool under the GNU General Public License version 2 for
+.B iptables(8)
+BPF extension, which abuses the
+.B libpcap
+internal classic BPF compiler, his code derived here for usage with
+.B tc(8)
+:
+
+.in +4n
+.nf
+.sp
+#include <pcap.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+ struct bpf_program prog;
+ struct bpf_insn *ins;
+ int i, ret, dlt = DLT_RAW;
+
+ if (argc < 2 || argc > 3)
+ return 1;
+ if (argc == 3) {
+ dlt = pcap_datalink_name_to_val(argv[1]);
+ if (dlt == -1)
+ return 1;
+ }
+
+ ret = pcap_compile_nopcap(-1, dlt, &prog, argv[argc - 1],
+ 1, PCAP_NETMASK_UNKNOWN);
+ if (ret)
+ return 1;
+
+ printf("%d,", prog.bf_len);
+ ins = prog.bf_insns;
+
+ for (i = 0; i < prog.bf_len - 1; ++ins, ++i)
+ printf("%u %u %u %u,", ins->code,
+ ins->jt, ins->jf, ins->k);
+ printf("%u %u %u %u",
+ ins->code, ins->jt, ins->jf, ins->k);
+
+ pcap_freecode(&prog);
+ return 0;
+}
+.fi
+.in
+
+Given this small helper, any
+.B tcpdump(8)
+filter expression can be abused as a classifier where a match will
+result in the default classid:
+
+.in +4n
+.B bpftool EN10MB 'tcp[tcpflags] & tcp-syn != 0' > /var/bpf/tcp-syn
+.br
+.B tc filter add dev em1 parent 1: bpf bytecode-file /var/bpf/tcp-syn flowid 1:1
+.in
+
+Basically, such a minimal generator is equivalent to:
+
+.in +4n
+.B tcpdump -iem1 -ddd 'tcp[tcpflags] & tcp-syn != 0' | tr '\\\\n' ',' > /var/bpf/tcp-syn
+.in
+
+Since
+.B libpcap
+does not support all Linux' specific cBPF extensions in its compiler, the
+Linux kernel also ships under
+.B tools/net/
+a minimal BPF assembler called
+.B bpf_asm
+for providing full control. For detailed syntax and semantics on implementing
+such programs by hand, see references under
+.B FURTHER READING
+\&.
+
+Trivial toy example in
+.B bpf_asm
+for classifying IPv4/TCP packets, saved in a text file called
+.B foobar
+:
+
+.in +4n
+.nf
+.sp
+ldh [12]
+jne #0x800, drop
+ldb [23]
+jneq #6, drop
+ret #-1
+drop: ret #0
+.fi
+.in
+
+Similarly, such a classifier can be loaded as:
+
+.in +4n
+.B bpf_asm foobar > /var/bpf/tcp-syn
+.br
+.B tc filter add dev em1 parent 1: bpf bytecode-file /var/bpf/tcp-syn flowid 1:1
+.in
+
+For BPF classifiers, the Linux kernel provides additionally under
+.B tools/net/
+a small BPF debugger called
+.B bpf_dbg
+, which can be used to test a classifier against pcap files, single-step
+or add various breakpoints into the classifier program and dump register
+contents during runtime.
+
+Implementing an action in classic BPF is rather limited in the sense that
+packet mangling is not supported. Therefore, it's generally recommended to
+make the switch to eBPF, whenever possible.
+
+.SH FURTHER READING
+Further and more technical details about the BPF architecture can be found
+in the Linux kernel source tree under
+.B Documentation/networking/filter.txt
+\&.
+
+Further details on eBPF
+.B tc(8)
+examples can be found in the iproute2 source
+tree under
+.B examples/bpf/
+\&.
+
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-ematch (8)
+.BR bpf (2)
+.BR bpf (4)
+
+.SH AUTHORS
+Manpage written by Daniel Borkmann.
+
+Please report corrections or improvements to the Linux kernel networking
+mailing list:
+.B <netdev@vger.kernel.org>
diff --git a/man/man8/tc-cbq-details.8 b/man/man8/tc-cbq-details.8
index ddaf3ca7..9368103b 100644
--- a/man/man8/tc-cbq-details.8
+++ b/man/man8/tc-cbq-details.8
@@ -5,54 +5,54 @@ CBQ \- Class Based Queueing
.B tc qdisc ... dev
dev
.B ( parent
-classid
-.B | root) [ handle
-major:
+classid
+.B | root) [ handle
+major:
.B ] cbq avpkt
bytes
.B bandwidth
rate
-.B [ cell
+.B [ cell
bytes
.B ] [ ewma
log
.B ] [ mpu
bytes
-.B ]
+.B ]
.B tc class ... dev
dev
-.B parent
+.B parent
major:[minor]
-.B [ classid
+.B [ classid
major:minor
.B ] cbq allot
bytes
-.B [ bandwidth
-rate
-.B ] [ rate
+.B [ bandwidth
+rate
+.B ] [ rate
rate
.B ] prio
priority
.B [ weight
weight
-.B ] [ minburst
+.B ] [ minburst
+packets
+.B ] [ maxburst
packets
-.B ] [ maxburst
-packets
-.B ] [ ewma
+.B ] [ ewma
log
.B ] [ cell
bytes
.B ] avpkt
bytes
.B [ mpu
-bytes
+bytes
.B ] [ bounded isolated ] [ split
handle
.B & defmap
defmap
-.B ] [ estimator
+.B ] [ estimator
interval timeconstant
.B ]
@@ -60,7 +60,7 @@ interval timeconstant
Class Based Queueing is a classful qdisc that implements a rich
linksharing hierarchy of classes. It contains shaping elements as
well as prioritizing capabilities. Shaping is performed using link
-idle time calculations based on the timing of dequeue events and
+idle time calculations based on the timing of dequeue events and
underlying link bandwidth.
.SH SHAPING ALGORITHM
@@ -71,10 +71,10 @@ When shaping a 10mbit/s connection to 1mbit/s, the link will
be idle 90% of the time. If it isn't, it needs to be throttled so that it
IS idle 90% of the time.
-From the kernel's perspective, this is hard to measure, so CBQ instead
-derives the idle time from the number of microseconds (in fact, jiffies)
-that elapse between requests from the device driver for more data. Combined
-with the knowledge of packet sizes, this is used to approximate how full or
+From the kernel's perspective, this is hard to measure, so CBQ instead
+derives the idle time from the number of microseconds (in fact, jiffies)
+that elapse between requests from the device driver for more data. Combined
+with the knowledge of packet sizes, this is used to approximate how full or
empty the link is.
This is rather circumspect and doesn't always arrive at proper
@@ -84,9 +84,9 @@ perhaps because of a badly implemented driver? A PCMCIA network card
will also never achieve 100mbit/s because of the way the bus is
designed - again, how do we calculate the idle time?
-The physical link bandwidth may be ill defined in case of not-quite-real
-network devices like PPP over Ethernet or PPTP over TCP/IP. The effective
-bandwidth in that case is probably determined by the efficiency of pipes
+The physical link bandwidth may be ill defined in case of not-quite-real
+network devices like PPP over Ethernet or PPTP over TCP/IP. The effective
+bandwidth in that case is probably determined by the efficiency of pipes
to userspace - which not defined.
During operations, the effective idletime is measured using an
@@ -104,59 +104,59 @@ CBQ throttles and is then 'overlimit'.
Conversely, an idle link might amass a huge avgidle, which would then
allow infinite bandwidths after a few hours of silence. To prevent
-this, avgidle is capped at
+this, avgidle is capped at
.B maxidle.
If overlimit, in theory, the CBQ could throttle itself for exactly the
amount of time that was calculated to pass between packets, and then
pass one packet, and throttle again. Due to timer resolution constraints,
-this may not be feasible, see the
+this may not be feasible, see the
.B minburst
parameter below.
.SH CLASSIFICATION
Within the one CBQ instance many classes may exist. Each of these classes
-contains another qdisc, by default
+contains another qdisc, by default
.BR tc-pfifo (8).
-When enqueueing a packet, CBQ starts at the root and uses various methods to
+When enqueueing a packet, CBQ starts at the root and uses various methods to
determine which class should receive the data. If a verdict is reached, this
process is repeated for the recipient class which might have further
means of classifying traffic to its children, if any.
-CBQ has the following methods available to classify a packet to any child
+CBQ has the following methods available to classify a packet to any child
classes.
.TP
(i)
.B skb->priority class encoding.
-Can be set from userspace by an application with the
+Can be set from userspace by an application with the
.B SO_PRIORITY
setsockopt.
-The
+The
.B skb->priority class encoding
-only applies if the skb->priority holds a major:minor handle of an existing
+only applies if the skb->priority holds a major:minor handle of an existing
class within this qdisc.
.TP
(ii)
tc filters attached to the class.
.TP
(iii)
-The defmap of a class, as set with the
+The defmap of a class, as set with the
.B split & defmap
parameters. The defmap may contain instructions for each possible Linux packet
priority.
.P
-Each class also has a
+Each class also has a
.B level.
Leaf nodes, attached to the bottom of the class hierarchy, have a level of 0.
.SH CLASSIFICATION ALGORITHM
-Classification is a loop, which terminates when a leaf class is found. At any
+Classification is a loop, which terminates when a leaf class is found. At any
point the loop may jump to the fallback algorithm.
The loop consists of the following steps:
-.TP
+.TP
(i)
If the packet is generated locally and has a valid classid encoded within its
.B skb->priority,
@@ -169,40 +169,40 @@ a class which is not a leaf class, restart loop from the class returned.
If it is a leaf, choose it and terminate.
.TP
(iii)
-If the tc filters did not return a class, but did return a classid,
-try to find a class with that id within this qdisc.
+If the tc filters did not return a class, but did return a classid,
+try to find a class with that id within this qdisc.
Check if the found class is of a lower
.B level
than the current class. If so, and the returned class is not a leaf node,
restart the loop at the found class. If it is a leaf node, terminate.
-If we found an upward reference to a higher level, enter the fallback
+If we found an upward reference to a higher level, enter the fallback
algorithm.
.TP
(iv)
If the tc filters did not return a class, nor a valid reference to one,
consider the minor number of the reference to be the priority. Retrieve
a class from the defmap of this class for the priority. If this did not
-contain a class, consult the defmap of this class for the
+contain a class, consult the defmap of this class for the
+.B BEST_EFFORT
+class. If this is an upward reference, or no
.B BEST_EFFORT
-class. If this is an upward reference, or no
-.B BEST_EFFORT
class was defined,
enter the fallback algorithm. If a valid class was found, and it is not a
-leaf node, restart the loop at this class. If it is a leaf, choose it and
+leaf node, restart the loop at this class. If it is a leaf, choose it and
terminate. If
-neither the priority distilled from the classid, nor the
-.B BEST_EFFORT
+neither the priority distilled from the classid, nor the
+.B BEST_EFFORT
priority yielded a class, enter the fallback algorithm.
.P
The fallback algorithm resides outside of the loop and is as follows.
.TP
(i)
-Consult the defmap of the class at which the jump to fallback occured. If
-the defmap contains a class for the
+Consult the defmap of the class at which the jump to fallback occurred. If
+the defmap contains a class for the
.B
priority
-of the class (which is related to the TOS field), choose this class and
-terminate.
+of the class (which is related to the TOS field), choose this class and
+terminate.
.TP
(ii)
Consult the map for a class for the
@@ -212,28 +212,28 @@ priority. If found, choose it, and terminate.
(iii)
Choose the class at which break out to the fallback algorithm occurred. Terminate.
.P
-The packet is enqueued to the class which was chosen when either algorithm
+The packet is enqueued to the class which was chosen when either algorithm
terminated. It is therefore possible for a packet to be enqueued *not* at a
leaf node, but in the middle of the hierarchy.
.SH LINK SHARING ALGORITHM
-When dequeuing for sending to the network device, CBQ decides which of its
+When dequeuing for sending to the network device, CBQ decides which of its
classes will be allowed to send. It does so with a Weighted Round Robin process
in which each class with packets gets a chance to send in turn. The WRR process
-starts by asking the highest priority classes (lowest numerically -
+starts by asking the highest priority classes (lowest numerically -
highest semantically) for packets, and will continue to do so until they
-have no more data to offer, in which case the process repeats for lower
+have no more data to offer, in which case the process repeats for lower
priorities.
.B CERTAINTY ENDS HERE, ANK PLEASE HELP
Each class is not allowed to send at length though - they can only dequeue a
-configurable amount of data during each round.
+configurable amount of data during each round.
If a class is about to go overlimit, and it is not
.B bounded
it will try to borrow avgidle from siblings that are not
-.B isolated.
+.B isolated.
This process is repeated from the bottom upwards. If a class is unable
to borrow enough avgidle to send a packet, it is throttled and not asked
for a packet for enough time for the avgidle to increase above zero.
@@ -244,7 +244,7 @@ for a packet for enough time for the avgidle to increase above zero.
.SH QDISC
The root qdisc of a CBQ class tree has the following parameters:
-.TP
+.TP
parent major:minor | root
This mandatory parameter determines the place of the CBQ instance, either at the
.B root
@@ -259,22 +259,22 @@ For calculations, the average packet size must be known. It is silently capped
at a minimum of 2/3 of the interface MTU. Mandatory.
.TP
bandwidth rate
-To determine the idle time, CBQ must know the bandwidth of your underlying
+To determine the idle time, CBQ must know the bandwidth of your underlying
physical interface, or parent qdisc. This is a vital parameter, more about it
later. Mandatory.
.TP
cell
The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
-.TP
+.TP
mpu
A zero sized packet may still take time to transmit. This value is the lower
cap for packet transmission time calculations - packets smaller than this value
are still deemed to have this size. Defaults to zero.
.TP
ewma log
-When CBQ needs to measure the average idle time, it does so using an
+When CBQ needs to measure the average idle time, it does so using an
Exponentially Weighted Moving Average which smooths out measurements into
-a moving average. The EWMA LOG determines how much smoothing occurs. Defaults
+a moving average. The EWMA LOG determines how much smoothing occurs. Defaults
to 5. Lower values imply greater sensitivity. Must be between 0 and 31.
.P
A CBQ qdisc does not shape out of its own accord. It only needs to know certain
@@ -283,35 +283,35 @@ parameters about the underlying link. Actual shaping is done in classes.
.SH CLASSES
Classes have a host of parameters to configure their operation.
-.TP
+.TP
parent major:minor
-Place of this class within the hierarchy. If attached directly to a qdisc
+Place of this class within the hierarchy. If attached directly to a qdisc
and not to another class, minor can be omitted. Mandatory.
-.TP
+.TP
classid major:minor
Like qdiscs, classes can be named. The major number must be equal to the
-major number of the qdisc to which it belongs. Optional, but needed if this
+major number of the qdisc to which it belongs. Optional, but needed if this
class is going to have children.
-.TP
+.TP
weight weight
-When dequeuing to the interface, classes are tried for traffic in a
+When dequeuing to the interface, classes are tried for traffic in a
round-robin fashion. Classes with a higher configured qdisc will generally
have more traffic to offer during each round, so it makes sense to allow
it to dequeue more traffic. All weights under a class are normalized, so
-only the ratios matter. Defaults to the configured rate, unless the priority
+only the ratios matter. Defaults to the configured rate, unless the priority
of this class is maximal, in which case it is set to 1.
-.TP
+.TP
allot bytes
Allot specifies how many bytes a qdisc can dequeue
-during each round of the process. This parameter is weighted using the
+during each round of the process. This parameter is weighted using the
renormalized class weight described above.
-.TP
+.TP
priority priority
-In the round-robin process, classes with the lowest priority field are tried
+In the round-robin process, classes with the lowest priority field are tried
for packets first. Mandatory.
-.TP
+.TP
rate rate
Maximum rate this class and all its children combined can send at. Mandatory.
@@ -321,7 +321,7 @@ This is different from the bandwidth specified when creating a CBQ disc. Only
used to determine maxidle and offtime, which are only calculated when
specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
-.TP
+.TP
maxburst
This number of packets is used to calculate maxidle so that when
avgidle is at maxidle, this number of average packets can be burst
@@ -329,7 +329,7 @@ before avgidle drops to 0. Set it higher to be more tolerant of
bursts. You can't set maxidle directly, only via this parameter.
.TP
-minburst
+minburst
As mentioned before, CBQ needs to throttle in case of
overlimit. The ideal solution is to do so for exactly the calculated
idle time, and pass 1 packet. However, Unix kernels generally have a
@@ -352,21 +352,21 @@ Minidle is specified in negative microseconds, so 10 means that
avgidle is capped at -10us.
.TP
-bounded
+bounded
Signifies that this class will not borrow bandwidth from its siblings.
-.TP
+.TP
isolated
Means that this class will not borrow bandwidth to its siblings
-.TP
+.TP
split major:minor & defmap bitmap[/bitmap]
-If consulting filters attached to a class did not give a verdict,
+If consulting filters attached to a class did not give a verdict,
CBQ can also classify based on the packet's priority. There are 16
-priorities available, numbered from 0 to 15.
+priorities available, numbered from 0 to 15.
-The defmap specifies which priorities this class wants to receive,
-specified as a bitmap. The Least Significant Bit corresponds to priority
-zero. The
+The defmap specifies which priorities this class wants to receive,
+specified as a bitmap. The Least Significant Bit corresponds to priority
+zero. The
.B split
parameter tells CBQ at which class the decision must be made, which should
be a (grand)parent of the class you are adding.
@@ -374,7 +374,7 @@ be a (grand)parent of the class you are adding.
As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
-The complimentary configuration would then
+The complimentary configuration would then
be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
.TP
@@ -384,11 +384,11 @@ can use to classify packets with. In order to determine the bandwidth
it uses a very simple estimator that measures once every
.B interval
microseconds how much traffic has passed. This again is a EWMA, for which
-the time constant can be specified, also in microseconds. The
+the time constant can be specified, also in microseconds. The
.B time constant
-corresponds to the sluggishness of the measurement or, conversely, to the
+corresponds to the sluggishness of the measurement or, conversely, to the
sensitivity of the average to short bursts. Higher values mean less
-sensitivity.
+sensitivity.
@@ -399,7 +399,7 @@ Sally Floyd and Van Jacobson, "Link-sharing and Resource
Management Models for Packet Networks",
IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
-.TP
+.TP
o
Sally Floyd, "Notes on CBQ and Guarantee Service", 1995
@@ -408,7 +408,7 @@ o
Sally Floyd, "Notes on Class-Based Queueing: Setting
Parameters", 1996
-.TP
+.TP
o
Sally Floyd and Michael Speer, "Experimental Results
for Class-Based Queueing", 1998, not published.
@@ -421,5 +421,3 @@ for Class-Based Queueing", 1998, not published.
.SH AUTHOR
Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-cbq.8 b/man/man8/tc-cbq.8
index b900e1c3..301265d8 100644
--- a/man/man8/tc-cbq.8
+++ b/man/man8/tc-cbq.8
@@ -5,56 +5,56 @@ CBQ \- Class Based Queueing
.B tc qdisc ... dev
dev
.B ( parent
-classid
-.B | root) [ handle
-major:
-.B ] cbq [ allot
+classid
+.B | root) [ handle
+major:
+.B ] cbq [ allot
bytes
.B ] avpkt
bytes
.B bandwidth
rate
-.B [ cell
+.B [ cell
bytes
.B ] [ ewma
log
.B ] [ mpu
bytes
-.B ]
+.B ]
.B tc class ... dev
dev
-.B parent
+.B parent
major:[minor]
-.B [ classid
+.B [ classid
major:minor
.B ] cbq allot
bytes
-.B [ bandwidth
-rate
-.B ] [ rate
+.B [ bandwidth
+rate
+.B ] [ rate
rate
.B ] prio
priority
.B [ weight
weight
-.B ] [ minburst
+.B ] [ minburst
+packets
+.B ] [ maxburst
packets
-.B ] [ maxburst
-packets
-.B ] [ ewma
+.B ] [ ewma
log
.B ] [ cell
bytes
.B ] avpkt
bytes
.B [ mpu
-bytes
+bytes
.B ] [ bounded isolated ] [ split
handle
.B & defmap
defmap
-.B ] [ estimator
+.B ] [ estimator
interval timeconstant
.B ]
@@ -62,7 +62,7 @@ interval timeconstant
Class Based Queueing is a classful qdisc that implements a rich
linksharing hierarchy of classes. It contains shaping elements as
well as prioritizing capabilities. Shaping is performed using link
-idle time calculations based on the timing of dequeue events and
+idle time calculations based on the timing of dequeue events and
underlying link bandwidth.
.SH SHAPING ALGORITHM
@@ -85,71 +85,71 @@ CBQ throttles and is then 'overlimit'.
Conversely, an idle link might amass a huge avgidle, which would then
allow infinite bandwidths after a few hours of silence. To prevent
-this, avgidle is capped at
+this, avgidle is capped at
.B maxidle.
If overlimit, in theory, the CBQ could throttle itself for exactly the
amount of time that was calculated to pass between packets, and then
pass one packet, and throttle again. Due to timer resolution constraints,
-this may not be feasible, see the
+this may not be feasible, see the
.B minburst
parameter below.
.SH CLASSIFICATION
Within the one CBQ instance many classes may exist. Each of these classes
-contains another qdisc, by default
+contains another qdisc, by default
.BR tc-pfifo (8).
-When enqueueing a packet, CBQ starts at the root and uses various methods to
-determine which class should receive the data.
+When enqueueing a packet, CBQ starts at the root and uses various methods to
+determine which class should receive the data.
-In the absence of uncommon configuration options, the process is rather easy.
-At each node we look for an instruction, and then go to the class the
-instruction refers us to. If the class found is a barren leaf-node (without
-children), we enqueue the packet there. If it is not yet a leaf node, we do
-the whole thing over again starting from that node.
+In the absence of uncommon configuration options, the process is rather easy.
+At each node we look for an instruction, and then go to the class the
+instruction refers us to. If the class found is a barren leaf-node (without
+children), we enqueue the packet there. If it is not yet a leaf node, we do
+the whole thing over again starting from that node.
-The following actions are performed, in order at each node we visit, until one
+The following actions are performed, in order at each node we visit, until one
sends us to another node, or terminates the process.
.TP
(i)
-Consult filters attached to the class. If sent to a leafnode, we are done.
+Consult filters attached to the class. If sent to a leafnode, we are done.
Otherwise, restart.
.TP
(ii)
-Consult the defmap for the priority assigned to this packet, which depends
+Consult the defmap for the priority assigned to this packet, which depends
on the TOS bits. Check if the referral is leafless, otherwise restart.
.TP
(iii)
-Ask the defmap for instructions for the 'best effort' priority. Check the
+Ask the defmap for instructions for the 'best effort' priority. Check the
answer for leafness, otherwise restart.
.TP
(iv)
If none of the above returned with an instruction, enqueue at this node.
.P
This algorithm makes sure that a packet always ends up somewhere, even while
-you are busy building your configuration.
+you are busy building your configuration.
For more details, see
.BR tc-cbq-details(8).
.SH LINK SHARING ALGORITHM
-When dequeuing for sending to the network device, CBQ decides which of its
+When dequeuing for sending to the network device, CBQ decides which of its
classes will be allowed to send. It does so with a Weighted Round Robin process
in which each class with packets gets a chance to send in turn. The WRR process
-starts by asking the highest priority classes (lowest numerically -
+starts by asking the highest priority classes (lowest numerically -
highest semantically) for packets, and will continue to do so until they
-have no more data to offer, in which case the process repeats for lower
+have no more data to offer, in which case the process repeats for lower
priorities.
-Classes by default borrow bandwidth from their siblings. A class can be
-prevented from doing so by declaring it 'bounded'. A class can also indicate
+Classes by default borrow bandwidth from their siblings. A class can be
+prevented from doing so by declaring it 'bounded'. A class can also indicate
its unwillingness to lend out bandwidth by being 'isolated'.
.SH QDISC
The root of a CBQ qdisc class tree has the following parameters:
-.TP
+.TP
parent major:minor | root
This mandatory parameter determines the place of the CBQ instance, either at the
.B root
@@ -159,7 +159,7 @@ handle major:
Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
of a major number, followed by a colon. Optional, but very useful if classes
will be generated within this qdisc.
-.TP
+.TP
allot bytes
This allotment is the 'chunkiness' of link sharing and is used for determining packet
transmission time tables. The qdisc allot differs slightly from the class allot discussed
@@ -170,23 +170,23 @@ The average size of a packet is needed for calculating maxidle, and is also used
for making sure 'allot' has a safe value. Mandatory.
.TP
bandwidth rate
-To determine the idle time, CBQ must know the bandwidth of your underlying
+To determine the idle time, CBQ must know the bandwidth of your underlying
physical interface, or parent qdisc. This is a vital parameter, more about it
later. Mandatory.
.TP
cell
The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
-.TP
+.TP
mpu
A zero sized packet may still take time to transmit. This value is the lower
cap for packet transmission time calculations - packets smaller than this value
are still deemed to have this size. Defaults to zero.
.TP
ewma log
-When CBQ needs to measure the average idle time, it does so using an
+When CBQ needs to measure the average idle time, it does so using an
Exponentially Weighted Moving Average which smooths out measurements into
-a moving average. The EWMA LOG determines how much smoothing occurs. Lower
-values imply greater sensitivity. Must be between 0 and 31. Defaults
+a moving average. The EWMA LOG determines how much smoothing occurs. Lower
+values imply greater sensitivity. Must be between 0 and 31. Defaults
to 5.
.P
A CBQ qdisc does not shape out of its own accord. It only needs to know certain
@@ -195,40 +195,40 @@ parameters about the underlying link. Actual shaping is done in classes.
.SH CLASSES
Classes have a host of parameters to configure their operation.
-.TP
+.TP
parent major:minor
-Place of this class within the hierarchy. If attached directly to a qdisc
+Place of this class within the hierarchy. If attached directly to a qdisc
and not to another class, minor can be omitted. Mandatory.
-.TP
+.TP
classid major:minor
Like qdiscs, classes can be named. The major number must be equal to the
-major number of the qdisc to which it belongs. Optional, but needed if this
+major number of the qdisc to which it belongs. Optional, but needed if this
class is going to have children.
-.TP
+.TP
weight weight
-When dequeuing to the interface, classes are tried for traffic in a
+When dequeuing to the interface, classes are tried for traffic in a
round-robin fashion. Classes with a higher configured qdisc will generally
have more traffic to offer during each round, so it makes sense to allow
it to dequeue more traffic. All weights under a class are normalized, so
-only the ratios matter. Defaults to the configured rate, unless the priority
+only the ratios matter. Defaults to the configured rate, unless the priority
of this class is maximal, in which case it is set to 1.
-.TP
+.TP
allot bytes
Allot specifies how many bytes a qdisc can dequeue
-during each round of the process. This parameter is weighted using the
+during each round of the process. This parameter is weighted using the
renormalized class weight described above. Silently capped at a minimum of
3/2 avpkt. Mandatory.
-.TP
+.TP
prio priority
-In the round-robin process, classes with the lowest priority field are tried
+In the round-robin process, classes with the lowest priority field are tried
for packets first. Mandatory.
-.TP
+.TP
avpkt
See the QDISC section.
-.TP
+.TP
rate rate
Maximum rate this class and all its children combined can send at. Mandatory.
@@ -238,7 +238,7 @@ This is different from the bandwidth specified when creating a CBQ disc! Only
used to determine maxidle and offtime, which are only calculated when
specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
-.TP
+.TP
maxburst
This number of packets is used to calculate maxidle so that when
avgidle is at maxidle, this number of average packets can be burst
@@ -246,7 +246,7 @@ before avgidle drops to 0. Set it higher to be more tolerant of
bursts. You can't set maxidle directly, only via this parameter.
.TP
-minburst
+minburst
As mentioned before, CBQ needs to throttle in case of
overlimit. The ideal solution is to do so for exactly the calculated
idle time, and pass 1 packet. However, Unix kernels generally have a
@@ -269,21 +269,21 @@ Minidle is specified in negative microseconds, so 10 means that
avgidle is capped at -10us. Optional.
.TP
-bounded
+bounded
Signifies that this class will not borrow bandwidth from its siblings.
-.TP
+.TP
isolated
Means that this class will not borrow bandwidth to its siblings
-.TP
+.TP
split major:minor & defmap bitmap[/bitmap]
-If consulting filters attached to a class did not give a verdict,
+If consulting filters attached to a class did not give a verdict,
CBQ can also classify based on the packet's priority. There are 16
-priorities available, numbered from 0 to 15.
+priorities available, numbered from 0 to 15.
-The defmap specifies which priorities this class wants to receive,
-specified as a bitmap. The Least Significant Bit corresponds to priority
-zero. The
+The defmap specifies which priorities this class wants to receive,
+specified as a bitmap. The Least Significant Bit corresponds to priority
+zero. The
.B split
parameter tells CBQ at which class the decision must be made, which should
be a (grand)parent of the class you are adding.
@@ -291,7 +291,7 @@ be a (grand)parent of the class you are adding.
As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
-The complimentary configuration would then
+The complimentary configuration would then
be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
.TP
@@ -301,22 +301,22 @@ can use to classify packets with. In order to determine the bandwidth
it uses a very simple estimator that measures once every
.B interval
microseconds how much traffic has passed. This again is a EWMA, for which
-the time constant can be specified, also in microseconds. The
+the time constant can be specified, also in microseconds. The
.B time constant
-corresponds to the sluggishness of the measurement or, conversely, to the
+corresponds to the sluggishness of the measurement or, conversely, to the
sensitivity of the average to short bursts. Higher values mean less
-sensitivity.
+sensitivity.
.SH BUGS
-The actual bandwidth of the underlying link may not be known, for example
-in the case of PPoE or PPTP connections which in fact may send over a
+The actual bandwidth of the underlying link may not be known, for example
+in the case of PPoE or PPTP connections which in fact may send over a
pipe, instead of over a physical device. CBQ is quite resilient to major
errors in the configured bandwidth, probably a the cost of coarser shaping.
-Default kernels rely on coarse timing information for making decisions. These
+Default kernels rely on coarse timing information for making decisions. These
may make shaping precise in the long term, but inaccurate on second long scales.
-See
+See
.BR tc-cbq-details(8)
for hints on how to improve this.
@@ -327,7 +327,7 @@ Sally Floyd and Van Jacobson, "Link-sharing and Resource
Management Models for Packet Networks",
IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
-.TP
+.TP
o
Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
@@ -336,7 +336,7 @@ o
Sally Floyd, "Notes on Class-Based Queueing: Setting
Parameters", 1996
-.TP
+.TP
o
Sally Floyd and Michael Speer, "Experimental Results
for Class-Based Queueing", 1998, not published.
@@ -349,5 +349,3 @@ for Class-Based Queueing", 1998, not published.
.SH AUTHOR
Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-cgroup.8 b/man/man8/tc-cgroup.8
new file mode 100644
index 00000000..2bea7d4a
--- /dev/null
+++ b/man/man8/tc-cgroup.8
@@ -0,0 +1,80 @@
+.TH "Cgroup classifier in tc" 8 " 21 Oct 2015" "iproute2" "Linux"
+
+.SH NAME
+cgroup \- control group based traffic control filter
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR tc " " filter " ... " cgroup " [ " match
+.IR EMATCH_TREE " ] [ "
+.B action
+.IR ACTION_SPEC " ]"
+.SH DESCRIPTION
+This filter serves as a hint to
+.B tc
+that the assigned class ID of the net_cls control group the process the packet
+originates from belongs to should be used for classification. Obviously, it is
+useful for locally generated packets only.
+.SH OPTIONS
+.TP
+.BI action " ACTION_SPEC"
+Apply an action from the generic actions framework on matching packets.
+.TP
+.BI match " EMATCH_TREE"
+Match packets using the extended match infrastructure. See
+.BR tc-ematch (8)
+for a detailed description of the allowed syntax in
+.IR EMATCH_TREE .
+.SH EXAMPLES
+In order to use this filter, a net_cls control group has to be created first and
+class as well as process ID(s) assigned to it. The following creates a net_cls
+cgroup named "foobar":
+
+.RS
+.EX
+modprobe cls_cgroup
+mkdir /sys/fs/cgroup/net_cls
+mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
+mkdir /sys/fs/cgroup/net_cls/foobar
+.EE
+.RE
+
+To assign a class ID to the created cgroup, a file named
+.I net_cls.classid
+has to be created which contains the class ID to be assigned as a hexadecimal,
+64bit wide number. The upper 32bits are reserved for the major handle, the
+remaining hold the minor. So a class ID of e.g.
+.B ff:be
+has to be written like so:
+.B 0xff00be
+(leading zeroes may be omitted). To continue the above example, the following
+assigns class ID 1:2 to foobar cgroup:
+
+.RS
+.EX
+echo 0x10002 > /sys/fs/cgroup/net_cls/foobar/net_cls.classid
+.EE
+.RE
+
+Finally some PIDs can be assigned to the given cgroup:
+
+.RS
+.EX
+echo 1234 > /sys/fs/cgroup/net_cls/foobar/tasks
+echo 5678 > /sys/fs/cgroup/net_cls/foobar/tasks
+.EE
+.RE
+
+Now by simply attaching a
+.B cgroup
+filter to a
+.B qdisc
+makes packets from PIDs 1234 and 5678 be pushed into class 1:2.
+
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-ematch (8),
+.br
+the file
+.I Documentation/cgroups/net_cls.txt
+of the Linux kernel tree
diff --git a/man/man8/tc-drr.8 b/man/man8/tc-drr.8
index f550a35d..2fea4ee2 100644
--- a/man/man8/tc-drr.8
+++ b/man/man8/tc-drr.8
@@ -92,4 +92,3 @@ as limits are handled by the individual child qdiscs.
.SH AUTHOR
sched_drr was written by Patrick McHardy.
-
diff --git a/man/man8/tc-flow.8 b/man/man8/tc-flow.8
new file mode 100644
index 00000000..f1b7e2a4
--- /dev/null
+++ b/man/man8/tc-flow.8
@@ -0,0 +1,265 @@
+.TH "Flow filter in tc" 8 "20 Oct 2015" "iproute2" "Linux"
+
+.SH NAME
+flow \- flow based traffic control filter
+.SH SYNOPSIS
+.TP
+Mapping mode:
+
+.RS
+.in +8
+.ti -8
+.BR tc " " filter " ... " "flow map key "
+.IR KEY " [ " OPS " ] [ " OPTIONS " ] "
+.RE
+.TP
+Hashing mode:
+
+.RS
+.in +8
+.ti -8
+.BR tc " " filter " ... " "flow hash keys "
+.IR KEY_LIST " [ "
+.B perturb
+.IR secs " ] [ " OPTIONS " ] "
+.RE
+
+.in +8
+.ti -8
+.IR OPS " := [ " OPS " ] " OP
+
+.ti -8
+.IR OPTIONS " := [ "
+.B divisor
+.IR NUM " ] [ "
+.B baseclass
+.IR ID " ] [ "
+.B match
+.IR EMATCH_TREE " ] [ "
+.B action
+.IR ACTION_SPEC " ]"
+
+.ti -8
+.IR KEY_LIST " := [ " KEY_LIST " ] " KEY
+
+.ti -8
+.IR OP " := { "
+.BR or " | " and " | " xor " | " rshift " | " addend " } "
+.I NUM
+
+.ti -8
+.IR ID " := " X : Y
+
+.ti -8
+.IR KEY " := { "
+.BR src " | " dst " | " proto " | " proto-src " | " proto-dst " | " iif " | "
+.BR priority " | " mark " | " nfct " | " nfct-src " | " nfct-dst " | "
+.BR nfct-proto-src " | " nfct-proto-dst " | " rt-classid " | " sk-uid " | "
+.BR sk-gid " | " vlan-tag " | " rxhash " }"
+.SH DESCRIPTION
+The
+.B flow
+classifier is meant to extend the
+.B SFQ
+hashing capabilities without hard-coding new hash functions. It also allows
+deterministic mappings of keys to classes.
+.SH OPTIONS
+.TP
+.BI action " ACTION_SPEC"
+Apply an action from the generic actions framework on matching packets.
+.TP
+.BI baseclass " ID"
+An offset for the resulting class ID.
+.I ID
+may be
+.BR root ", " none
+or a hexadecimal class ID in the form [\fIX\fB:\fR]\fIY\fR. If \fIX\fR is
+omitted, it is assumed to be zero.
+.TP
+.BI divisor " NUM"
+Number of buckets to use for sorting into. Keys are calculated modulo
+.IR NUM .
+.TP
+.BI "hash keys " KEY-LIST
+Perform a
+.B jhash2
+operation over the keys in
+.IR KEY-LIST ,
+the result (modulo the
+.B divisor
+if given) is taken as class ID, optionally offset by the value of
+.BR baseclass .
+It is possible to specify an interval (in seconds) after which
+.BR jhash2 's
+entropy source is recreated using the
+.B perturb
+parameter.
+.TP
+.BI "map key " KEY
+Packet data identified by
+.I KEY
+is translated into class IDs to push the packet into. The value may be mangled by
+.I OPS
+before using it for the mapping. They are applied in the order listed here:
+.RS
+.TP 4
+.BI and " NUM"
+Perform bitwise
+.B AND
+operation with numeric value
+.IR NUM .
+.TP
+.BI or " NUM"
+Perform bitwise
+.B OR
+operation with numeric value
+.IR NUM .
+.TP
+.BI xor " NUM"
+Perform bitwise
+.B XOR
+operation with numeric value
+.IR NUM .
+.TP
+.BI rshift " NUM"
+Shift the value of
+.I KEY
+to the right by
+.I NUM
+bits.
+.TP
+.BI addend " NUM"
+Add
+.I NUM
+to the value of
+.IR KEY .
+
+.RE
+.RS
+For the
+.BR or ", " and ", " xor " and " rshift
+operations,
+.I NUM
+is assumed to be an unsigned, 32bit integer value. For the
+.B addend
+operation,
+.I NUM
+may be much more complex: It may be prefixed by a minus ('-') sign to cause
+subtraction instead of addition and for keys of
+.BR src ", " dst ", " nfct-src " and " nfct-dst
+it may be given in IP address notation. See below for an illustrating example.
+.RE
+.TP
+.BI match " EMATCH_TREE"
+Match packets using the extended match infrastructure. See
+.BR tc-ematch (8)
+for a detailed description of the allowed syntax in
+.IR EMATCH_TREE .
+.SH KEYS
+In mapping mode, a single key is used (after optional permutation) to build a
+class ID. The resulting ID is deducible in most cases. In hashing more, a number
+of keys may be specified which are then hashed and the output used as class ID.
+This ID is not deducible in beforehand, and may even change over time for a
+given flow if a
+.B perturb
+interval has been given.
+
+The range of class IDs can be limited by the
+.B divisor
+option, which is used for a modulus.
+.TP
+.BR src ", " dst
+Use source or destination address as key. In case of IPv4 and TIPC, this is the
+actual address value. For IPv6, the 128bit address is folded into a 32bit value
+by XOR'ing the four 32bit words. In all other cases, the kernel-internal socket
+address is used (after folding into 32bits on 64bit systems).
+.TP
+.B proto
+Use the layer four protocol number as key.
+.TP
+.B proto-src
+Use the layer four source port as key. If not available, the kernel-internal
+socket address is used instead.
+.TP
+.B proto-dst
+Use the layer four destination port as key. If not available, the associated
+kernel-internal dst_entry address is used after XOR'ing with the packet's
+layer three protocol number.
+.TP
+.B iif
+Use the incoming interface index as key.
+.TP
+.B priority
+Use the packet's priority as key. Usually this is the IP header's DSCP/ECN
+value.
+.TP
+.B mark
+Use the netfilter
+.B fwmark
+as key.
+.TP
+.B nfct
+Use the associated conntrack entry address as key.
+.TP
+.BR nfct-src ", " nfct-dst ", " nfct-proto-src ", " nfct-proto-dst
+These are conntrack-aware variants of
+.BR src ", " dst ", " proto-src " and " proto-dst .
+In case of NAT, these are basically the packet header's values before NAT was
+applied.
+.TP
+.B rt-classid
+Use the packet's destination routing table entry's realm as key.
+.TP
+.B sk-uid
+.TQ
+.B sk-gid
+For locally generated packets, use the user or group ID the originating socket
+belongs to as key.
+.TP
+.B vlan-tag
+Use the packet's vlan ID as key.
+.TP
+.B rxhash
+Use the flow hash as key.
+
+.SH EXAMPLES
+.TP
+Classic SFQ hash:
+
+.EX
+tc filter add ... flow hash \\
+ keys src,dst,proto,proto-src,proto-dst divisor 1024
+.EE
+.TP
+Classic SFQ hash, but using information from conntrack to work properly in combination with NAT:
+
+.EX
+tc filter add ... flow hash \\
+ keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst \\
+ divisor 1024
+.EE
+.TP
+Map destination IPs of 192.168.0.0/24 to classids 1-257:
+
+.EX
+tc filter add ... flow map \\
+ key dst addend -192.168.0.0 divisor 256
+.EE
+.TP
+Alternative to the above:
+
+.EX
+tc filter add ... flow map \\
+ key dst and 0xff
+.EE
+.TP
+The same, but in reverse order:
+
+.EX
+tc filter add ... flow map \\
+ key dst and 0xff xor 0xff
+.EE
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-ematch (8),
+.BR tc-sfq (8)
diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8
new file mode 100644
index 00000000..df4d8e19
--- /dev/null
+++ b/man/man8/tc-flower.8
@@ -0,0 +1,113 @@
+.TH "Flower filter in tc" 8 "22 Oct 2015" "iproute2" "Linux"
+
+.SH NAME
+flower \- flow based traffic control filter
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR tc " " filter " ... " flower " [ "
+.IR MATCH_LIST " ] [ "
+.B action
+.IR ACTION_SPEC " ] [ "
+.B classid
+.IR CLASSID " ]"
+
+.ti -8
+.IR MATCH_LIST " := [ " MATCH_LIST " ] " MATCH
+
+.ti -8
+.IR MATCH " := { "
+.B indev
+.IR ifname " | { "
+.BR dst_mac " | " src_mac " } "
+.IR mac_address " | "
+.BR eth_type " { " ipv4 " | " ipv6 " | "
+.IR ETH_TYPE " } | "
+.BR ip_proto " { " tcp " | " udp " | "
+.IR IP_PROTO " } | { "
+.BR dst_ip " | " src_ip " } { "
+.IR ipv4_address " | " ipv6_address " } | { "
+.BR dst_port " | " src_port " } "
+.IR port_number " }"
+.SH DESCRIPTION
+The
+.B flower
+filter matches flows to the set of keys specified and assigns an arbitrarily
+chosen class ID to packets belonging to them. Additionally (or alternatively) an
+action from the generic action framework may be called.
+.SH OPTIONS
+.TP
+.BI action " ACTION_SPEC"
+Apply an action from the generic actions framework on matching packets.
+.TP
+.BI classid " CLASSID"
+Specify a class to pass matching packets on to.
+.I CLASSID
+is in the form
+.BR X : Y ", while " X " and " Y
+are interpreted as numbers in hexadecimal format.
+.TP
+.BI indev " ifname"
+Match on incoming interface name. Obviously this makes sense only for forwarded
+flows.
+.I ifname
+is the name of an interface which must exist at the time of
+.B tc
+invocation.
+.TP
+.BI dst_mac " mac_address"
+.TQ
+.BI src_mac " mac_address"
+Match on source or destination MAC address.
+.TP
+.BI eth_type " ETH_TYPE"
+Match on layer three protocol.
+.I ETH_TYPE
+may be either
+.BR ipv4 , ipv6
+or an unsigned 16bit value in hexadecimal format.
+.TP
+.BI ip_proto " IP_PROTO"
+Match on layer four protocol.
+.I IP_PROTO
+may be either
+.BR tcp , udp
+or an unsigned 8bit value in hexadecimal format.
+.TP
+.BI dst_ip " ADDRESS"
+.TQ
+.BI src_ip " ADDRESS"
+Match on source or destination IP address.
+.I ADDRESS
+must be a valid IPv4 or IPv6 address, depending on
+.BR ether_type ,
+which has to be specified in beforehand.
+.TP
+.BI dst_port " NUMBER"
+.TQ
+.BI src_port " NUMBER"
+Match on layer 4 protocol source or destination port number. Only available for
+.BR ip_proto " values " udp " and " tcp ,
+which has to be specified in beforehand.
+.SH NOTES
+As stated above where applicable, matches of a certain layer implicitly depend
+on the matches of the next lower layer. Precisely, layer one and two matches (
+.BR indev , dst_mac , src_mac " and " eth_type )
+have no dependency, layer three matches (
+.BR ip_proto , dst_ip " and " src_ip )
+require
+.B eth_type
+being set to either
+.BR ipv4 " or " ipv6 ,
+and finally layer four matches (
+.BR dst_port " and " src_port )
+depend on
+.B ip_proto
+being set to either
+.BR tcp " or " udp .
+.P
+There can be only used one mask per one prio. If user needs to specify different
+mask, he has to use different prio.
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-flow (8)
diff --git a/man/man8/tc-fq.8 b/man/man8/tc-fq.8
new file mode 100644
index 00000000..f058a05a
--- /dev/null
+++ b/man/man8/tc-fq.8
@@ -0,0 +1,92 @@
+.TH FQ 8 "10 Sept 2015" "iproute2" "Linux"
+.SH NAME
+FQ \- Fair Queue traffic policing
+.SH SYNOPSIS
+.B tc qdisc ... fq
+[
+.B limit
+PACKETS ] [
+.B flow_limit
+PACKETS ] [
+.B quantum
+BYTES ] [
+.B initial_quantum
+BYTES ] [
+.B maxrate
+RATE ] [
+.B buckets
+NUMBER ] [
+.B pacing
+|
+.B nopacing
+]
+
+.SH DESCRIPTION
+FQ (Fair Queue) is a classless packet scheduler meant to be mostly
+used for locally generated traffic. It is designed to achieve per flow pacing.
+FQ does flow separation, and is able to respect pacing requirements set by TCP stack.
+All packets belonging to a socket are considered as a 'flow'.
+For non local packets (router workload), packet rxhash is used as fallback.
+
+An application can specify a maximum pacing rate using the
+.B SO_MAX_PACING_RATE
+setsockopt call. This packet scheduler adds delay between packets to
+respect rate limitation set by TCP stack.
+
+Dequeueing happens in a round-robin fashion.
+A special FIFO queue is reserved for high priority packets (
+.B TC_PRIO_CONTROL
+priority), such packets are always dequeued first.
+
+FQ is non-work-conserving.
+
+TCP pacing is good for flows having idle times, as the congestion
+window permits TCP stack to queue a possibly large number of packets.
+This removes the 'slow start after idle' choice, badly hitting
+large BDP flows and applications delivering chunks of data such as video streams.
+
+.SH PARAMETERS
+.SS limit
+Hard limit on the real queue size. When this limit is reached, new packets
+are dropped. If the value is lowered, packets are dropped so that the new limit is
+met. Default is 10000 packets.
+.SS flow_limit
+Hard limit on the maximum number of packets queued per flow.
+Default value is 100.
+.SS quantum
+The credit per dequeue RR round, i.e. the amount of bytes a flow is allowed to
+dequeue at once. A larger value means a longer time period before the next flow
+will be served.
+Default is 2 * interface MTU bytes.
+.SS initial_quantum
+The initial sending rate credit, i.e. the amount of bytes a new flow is allowed
+to dequeue initially.
+This is specifically meant to allow using IW10 without added delay.
+Default is 10 * interface MTU, i.e. 15140 for 'standard' ethernet.
+.SS maxrate
+Maximum sending rate of a flow. Default is unlimited.
+Application specific setting via
+.B SO_MAX_PACING_RATE
+is ignored only if it is larger than this value.
+.SS buckets
+The size of the hash table used for flow lookups. Each bucket is assigned a
+red-black tree for efficient collision sorting.
+Default: 1024.
+.SS [no]pacing
+Enable or disable flow pacing. Default is enabled.
+.SH EXAMPLES
+#tc qdisc add dev eth0 root fq
+.br
+#tc -s -d qdisc
+.br
+qdisc fq 8003: dev eth0 root refcnt 2 limit 10000p flow_limit 100p buckets 1024 quantum 3028 initial_quantum 15140
+ Sent 503727981 bytes 1146972 pkt (dropped 0, overlimits 0 requeues 54452)
+ backlog 0b 0p requeues 54452
+ 1289 flows (1289 inactive, 0 throttled)
+ 0 gc, 31 highprio, 27411 throttled
+.br
+.SH SEE ALSO
+.BR tc (8),
+.BR socket (7)
+.SH AUTHORS
+FQ was written by Eric Dumazet.
diff --git a/man/man8/tc-fw.8 b/man/man8/tc-fw.8
new file mode 100644
index 00000000..d742b473
--- /dev/null
+++ b/man/man8/tc-fw.8
@@ -0,0 +1,66 @@
+.TH "Firewall mark classifier in tc" 8 "21 Oct 2015" "iproute2" "Linux"
+
+.SH NAME
+fw \- fwmark traffic control filter
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR tc " " filter " ... " fw " [ " classid
+.IR CLASSID " ] [ "
+.B action
+.IR ACTION_SPEC " ]"
+.SH DESCRIPTION
+the
+.B fw
+filter allows to classify packets based on a previously set
+.BR fwmark " by " iptables .
+If it is identical to the filter's
+.BR handle ,
+the filter matches.
+.B iptables
+allows to mark single packets with the
+.B MARK
+target, or whole connections using
+.BR CONNMARK .
+The benefit of using this filter instead of doing the
+heavy-lifting with
+.B tc
+itself is that on one hand it might be convenient to keep packet filtering and
+classification in one place, possibly having to match a packet just once, and on
+the other users familiar with
+.BR iptables " but not " tc
+will have a less hard time adding QoS to their setups.
+.SH OPTIONS
+.TP
+.BI classid " CLASSID"
+Push matching packets to the class identified by
+.IR CLASSID .
+.TP
+.BI action " ACTION_SPEC"
+Apply an action from the generic actions framework on matching packets.
+.SH EXAMPLES
+Take e.g. the following tc filter statement:
+
+.RS
+.EX
+tc filter add ... handle 6 fw classid 1:1
+.EE
+.RE
+
+will match if the packet's
+.B fwmark
+value is
+.BR 6 .
+This is a sample
+.B iptables
+statement marking packets coming in on eth0:
+
+.RS
+.EX
+iptables -t mangle -A PREROUTING -i eth0 -j MARK --set-mark 6
+.EE
+.RE
+.SH SEE ALSO
+.BR tc (8),
+.BR iptables (8),
+.BR iptables-extensions (8)
diff --git a/man/man8/tc-htb.8 b/man/man8/tc-htb.8
index d196ecd4..ae310f43 100644
--- a/man/man8/tc-htb.8
+++ b/man/man8/tc-htb.8
@@ -5,30 +5,30 @@ HTB \- Hierarchy Token Bucket
.B tc qdisc ... dev
dev
.B ( parent
-classid
-.B | root) [ handle
-major:
-.B ] htb [ default
+classid
+.B | root) [ handle
+major:
+.B ] htb [ default
minor-id
-.B ]
+.B ]
.B tc class ... dev
dev
-.B parent
+.B parent
major:[minor]
-.B [ classid
+.B [ classid
major:minor
.B ] htb rate
rate
.B [ ceil
-rate
-.B ] burst
+rate
+.B ] burst
bytes
.B [ cburst
bytes
.B ] [ prio
priority
-.B ]
+.B ]
.SH DESCRIPTION
HTB is meant as a more understandable and intuitive replacement for
@@ -37,9 +37,9 @@ of the outbound bandwidth on a given link. Both allow you to use one
physical link to simulate several slower links and to send different
kinds of traffic on different simulated links. In both cases, you have
to specify how to divide the physical link into simulated links and
-how to decide which simulated link to use for a given packet to be sent.
+how to decide which simulated link to use for a given packet to be sent.
-Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm
+Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm
which does not depend on interface characteristics and so does not need to
know the underlying bandwidth of the outgoing interface.
@@ -48,31 +48,31 @@ Shaping works as documented in
.B tc-tbf (8).
.SH CLASSIFICATION
-Within the one HRB instance many classes may exist. Each of these classes
-contains another qdisc, by default
+Within the one HTB instance many classes may exist. Each of these classes
+contains another qdisc, by default
.BR tc-pfifo (8).
-When enqueueing a packet, HTB starts at the root and uses various methods to
-determine which class should receive the data.
+When enqueueing a packet, HTB starts at the root and uses various methods to
+determine which class should receive the data.
-In the absence of uncommon configuration options, the process is rather easy.
-At each node we look for an instruction, and then go to the class the
-instruction refers us to. If the class found is a barren leaf-node (without
-children), we enqueue the packet there. If it is not yet a leaf node, we do
-the whole thing over again starting from that node.
+In the absence of uncommon configuration options, the process is rather easy.
+At each node we look for an instruction, and then go to the class the
+instruction refers us to. If the class found is a barren leaf-node (without
+children), we enqueue the packet there. If it is not yet a leaf node, we do
+the whole thing over again starting from that node.
-The following actions are performed, in order at each node we visit, until one
+The following actions are performed, in order at each node we visit, until one
sends us to another node, or terminates the process.
.TP
(i)
-Consult filters attached to the class. If sent to a leafnode, we are done.
+Consult filters attached to the class. If sent to a leafnode, we are done.
Otherwise, restart.
.TP
(ii)
If none of the above returned with an instruction, enqueue at this node.
.P
This algorithm makes sure that a packet always ends up somewhere, even while
-you are busy building your configuration.
+you are busy building your configuration.
.SH LINK SHARING ALGORITHM
FIXME
@@ -80,7 +80,7 @@ FIXME
.SH QDISC
The root of a HTB qdisc class tree has the following parameters:
-.TP
+.TP
parent major:minor | root
This mandatory parameter determines the place of the HTB instance, either at the
.B root
@@ -90,54 +90,54 @@ handle major:
Like all other qdiscs, the HTB can be assigned a handle. Should consist only
of a major number, followed by a colon. Optional, but very useful if classes
will be generated within this qdisc.
-.TP
+.TP
default minor-id
Unclassified traffic gets sent to the class with this minor-id.
.SH CLASSES
Classes have a host of parameters to configure their operation.
-.TP
+.TP
parent major:minor
-Place of this class within the hierarchy. If attached directly to a qdisc
+Place of this class within the hierarchy. If attached directly to a qdisc
and not to another class, minor can be omitted. Mandatory.
-.TP
+.TP
classid major:minor
Like qdiscs, classes can be named. The major number must be equal to the
-major number of the qdisc to which it belongs. Optional, but needed if this
+major number of the qdisc to which it belongs. Optional, but needed if this
class is going to have children.
-.TP
+.TP
prio priority
-In the round-robin process, classes with the lowest priority field are tried
+In the round-robin process, classes with the lowest priority field are tried
for packets first. Mandatory.
-.TP
+.TP
rate rate
Maximum rate this class and all its children are guaranteed. Mandatory.
.TP
ceil rate
-Maximum rate at which a class can send, if its parent has bandwidth to spare.
+Maximum rate at which a class can send, if its parent has bandwidth to spare.
Defaults to the configured rate, which implies no borrowing
-.TP
+.TP
burst bytes
-Amount of bytes that can be burst at
+Amount of bytes that can be burst at
.B ceil
speed, in excess of the configured
-.B rate.
+.B rate.
Should be at least as high as the highest burst of all children.
-.TP
+.TP
cburst bytes
Amount of bytes that can be burst at 'infinite' speed, in other words, as fast
as the interface can transmit them. For perfect evening out, should be equal to at most one average
packet. Should be at least as high as the highest cburst of all children.
.SH NOTES
-Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel,
+Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel,
there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick.
-From this, the minimum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte
+From this, the minimum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte
burst as 100*12kb*8 equals 10mbit.
.SH SEE ALSO
@@ -146,5 +146,3 @@ burst as 100*12kb*8 equals 10mbit.
HTB website: http://luxik.cdi.cz/~devik/qos/htb/
.SH AUTHOR
Martin Devera <devik@cdi.cz>. This manpage maintained by bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-mqprio.8 b/man/man8/tc-mqprio.8
index da3bf089..0e1d305d 100644
--- a/man/man8/tc-mqprio.8
+++ b/man/man8/tc-mqprio.8
@@ -85,7 +85,7 @@ belong to an application. See kernel and cgroup documentation for details.
.SH QDISC PARAMETERS
.TP
num_tc
-Number of traffic classes to use upto 16 classes supported.
+Number of traffic classes to use. Up to 16 classes supported.
.TP
map
diff --git a/man/man8/tc-netem.8 b/man/man8/tc-netem.8
index 53c4de97..b31384f5 100644
--- a/man/man8/tc-netem.8
+++ b/man/man8/tc-netem.8
@@ -2,9 +2,9 @@
.SH NAME
NetEm \- Network Emulator
.SH SYNOPSIS
-.B "tc qdisc ... dev"
+.B "tc qdisc ... dev"
.IR DEVICE " ] "
-.BR "add netem"
+.BR "add netem"
.I OPTIONS
.IR OPTIONS " := [ " LIMIT " ] [ " DELAY " ] [ " LOSS \
@@ -15,15 +15,15 @@ NetEm \- Network Emulator
.I packets
.IR DELAY " := "
-.BI delay
+.BI delay
.IR TIME " [ " JITTER " [ " CORRELATION " ]]]"
.br
- [
+ [
.BR distribution " { "uniform " | " normal " | " pareto " | " paretonormal " } ]"
.IR LOSS " := "
.BR loss " { "
-.BI random
+.BI random
.IR PERCENT " [ " CORRELATION " ] |"
.br
.RB " " state
@@ -44,13 +44,13 @@ NetEm \- Network Emulator
.IR REORDERING " := "
.B reorder
.IR PERCENT " [ " CORRELATION " ] [ "
-.B gap
+.B gap
.IR DISTANCE " ]"
.IR RATE " := "
.B rate
.IR RATE " [ " PACKETOVERHEAD " [ " CELLSIZE " [ " CELLOVERHEAD " ]]]]"
-
+
.SH DESCRIPTION
NetEm is an enhancement of the Linux traffic control facilities
@@ -139,11 +139,11 @@ in this second example 25% of packets are sent immediately (with correlation of
50%) while the others are delayed by 10 ms.
.SS rate
-delay packets based on packet size and is a replacement for
+delay packets based on packet size and is a replacement for
.IR TBF .
Rate can be
-specified in common units (e.g. 100kbit). Optional
-.I PACKETOVERHEAD
+specified in common units (e.g. 100kbit). Optional
+.I PACKETOVERHEAD
(in bytes) specify an per packet overhead and can be negative. A positive value can be
used to simulate additional link layer headers. A negative value can be used to
artificial strip the Ethernet header (e.g. -14) and/or simulate a link layer
@@ -152,7 +152,7 @@ the cellsize. Cellsize can be used to simulate link layer schemes. ATM for
example has an payload cellsize of 48 bytes and 5 byte per cell header. If a
packet is 50 byte then ATM must use two cells: 2 * 48 bytes payload including 2
* 5 byte header, thus consume 106 byte on the wire. The last optional value
-.I CELLOVERHEAD
+.I CELLOVERHEAD
can be used to specify per cell overhead - for our ATM example 5.
.I CELLOVERHEAD
can be negative, but use negative values with caution.
diff --git a/man/man8/tc-pfifo_fast.8 b/man/man8/tc-pfifo_fast.8
index 43ab166e..baf34b1d 100644
--- a/man/man8/tc-pfifo_fast.8
+++ b/man/man8/tc-pfifo_fast.8
@@ -13,14 +13,14 @@ is detached.
In this sense this qdisc is magic, and unlike other qdiscs.
.SH ALGORITHM
-The algorithm is very similar to that of the classful
+The algorithm is very similar to that of the classful
.BR tc-prio (8)
-qdisc.
+qdisc.
.B pfifo_fast
is like three
.BR tc-pfifo (8)
queues side by side, where packets can be enqueued in any of the three bands
-based on their Type of Service bits or assigned priority.
+based on their Type of Service bits or assigned priority.
Not all three bands are dequeued simultaneously - as long as lower bands
have traffic, higher bands are never dequeued. This can be used to
@@ -28,7 +28,7 @@ prioritize interactive traffic or penalize 'lowest cost' traffic.
Each band can be txqueuelen packets long, as configured with
.BR ifconfig (8)
-or
+or
.BR ip (8).
Additional packets coming in are not enqueued but are instead dropped.
@@ -36,7 +36,7 @@ See
.BR tc-prio (8)
for complete details on how TOS bits are translated into bands.
.SH PARAMETERS
-.TP
+.TP
txqueuelen
The length of the three bands depends on the interface txqueuelen, as
specified with
@@ -46,7 +46,7 @@ or
.SH BUGS
Does not maintain statistics and does not show up in tc qdisc ls. This is because
-it is the automatic default in the absence of a configured qdisc.
+it is the automatic default in the absence of a configured qdisc.
.SH SEE ALSO
.BR tc (8)
@@ -55,5 +55,3 @@ it is the automatic default in the absence of a configured qdisc.
Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
This manpage maintained by bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-prio.8 b/man/man8/tc-prio.8
index 99a4a261..605f3d39 100644
--- a/man/man8/tc-prio.8
+++ b/man/man8/tc-prio.8
@@ -5,21 +5,21 @@ PRIO \- Priority qdisc
.B tc qdisc ... dev
dev
.B ( parent
-classid
-.B | root) [ handle
-major:
-.B ] prio [ bands
+classid
+.B | root) [ handle
+major:
+.B ] prio [ bands
bands
.B ] [ priomap
band band band...
-.B ] [ estimator
+.B ] [ estimator
interval timeconstant
.B ]
.SH DESCRIPTION
The PRIO qdisc is a simple classful queueing discipline that contains
an arbitrary number of classes of differing priority. The classes are
-dequeued in numerical descending order of priority. PRIO is a scheduler
+dequeued in numerical descending order of priority. PRIO is a scheduler
and never delays packets - it is a work-conserving qdisc, though the qdiscs
contained in the classes may not be.
@@ -51,22 +51,22 @@ From userspace
A process with sufficient privileges can encode the destination class
directly with SO_PRIORITY, see
.BR socket(7).
-.TP
+.TP
with a tc filter
A tc filter attached to the root qdisc can point traffic directly to a class
-.TP
+.TP
with the priomap
Based on the packet priority, which in turn is derived from the Type of
Service assigned to the packet.
.P
-Only the priomap is specific to this qdisc.
+Only the priomap is specific to this qdisc.
.SH QDISC PARAMETERS
.TP
bands
Number of bands. If changed from the default of 3,
.B priomap
must be updated as well.
-.TP
+.TP
priomap
The priomap maps the priority of
a packet to a class. The priority can either be set directly from userspace,
@@ -126,7 +126,7 @@ TOS Bits Means Linux Priority Band
The second column contains the value of the relevant
four TOS bits, followed by their translated meaning. For example, 15 stands
for a packet wanting Minimal Monetary Cost, Maximum Reliability, Maximum
-Throughput AND Minimum Delay.
+Throughput AND Minimum Delay.
The fourth column lists the way the Linux kernel interprets the TOS bits, by
showing to which Priority they are mapped.
@@ -151,7 +151,7 @@ FTP
TFTP 1000 (minimize delay)
-SMTP
+SMTP
Command phase 1000 (minimize delay)
DATA phase 0100 (maximize throughput)
@@ -176,12 +176,10 @@ further qdisc.
.SH BUGS
Large amounts of traffic in the lower bands can cause starvation of higher
-bands. Can be prevented by attaching a shaper (for example,
+bands. Can be prevented by attaching a shaper (for example,
.BR tc-tbf(8)
to these bands to make sure they cannot dominate the link.
.SH AUTHORS
Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, J Hadi Salim
<hadi@cyberus.ca>. This manpage maintained by bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-red.8 b/man/man8/tc-red.8
index d001c498..dd1ab74c 100644
--- a/man/man8/tc-red.8
+++ b/man/man8/tc-red.8
@@ -1,17 +1,17 @@
.TH RED 8 "13 December 2001" "iproute2" "Linux"
.SH NAME
-red \- Random Early Detection
+red \- Random Early Detection
.SH SYNOPSIS
.B tc qdisc ... red
-.B limit
+.B limit
+bytes
+.B [ min
+bytes
+.B ] [ max
bytes
-.B [ min
-bytes
-.B ] [ max
-bytes
.B ] avpkt
bytes
-.B [ burst
+.B [ burst
packets
.B ] [ ecn ] [ harddrop] [ bandwidth
rate
@@ -46,51 +46,51 @@ The average queue size is used for determining the marking
probability. This is calculated using an Exponential Weighted Moving
Average, which can be more or less sensitive to bursts.
-When the average queue size is below
+When the average queue size is below
.B min
-bytes, no packet will ever be marked. When it exceeds
-.B min,
+bytes, no packet will ever be marked. When it exceeds
+.B min,
the probability of doing so climbs linearly up
-to
-.B probability,
+to
+.B probability,
until the average queue size hits
.B max
-bytes. Because
-.B probability
+bytes. Because
+.B probability
is normally not set to 100%, the queue size might
-conceivably rise above
+conceivably rise above
.B max
-bytes, so the
+bytes, so the
.B limit
parameter is provided to set a hard maximum for the size of the queue.
.SH PARAMETERS
-.TP
+.TP
min
Average queue size at which marking becomes a possibility. Defaults to
.B max
/3
-.TP
+.TP
max
At this average queue size, the marking probability is maximal. Should be at
least twice
.B min
-to prevent synchronous retransmits, higher for low
+to prevent synchronous retransmits, higher for low
.B min.
-Default to
+Default to
.B limit
/4
-.TP
+.TP
probability
Maximum probability for marking, specified as a floating point
number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%,
respectively). Default : 0.02
-.TP
+.TP
limit
Hard limit on the real (not average) queue size in bytes. Further packets
are dropped. Should be set higher than max+burst. It is advised to set this
-a few times higher than
+a few times higher than
.B max.
.TP
burst
@@ -98,7 +98,7 @@ Used for determining how fast the average queue size is influenced by the
real queue size. Larger values make the calculation more sluggish, allowing
longer bursts of traffic before marking starts. Real life experiments
support the following guideline: (min+min+max)/(3*avpkt).
-.TP
+.TP
avpkt
Specified in bytes. Used with burst to determine the time constant for
average queue size calculations. 1000 is a good value.
@@ -126,15 +126,15 @@ bytes, this parameter forces a drop instead of ecn marking.
adaptive
(Added in linux-3.3) Sets RED in adaptive mode as described in http://icir.org/floyd/papers/adaptiveRed.pdf
.nf
-Goal of Adaptive RED is to make 'probability' dynamic value between 1% and 50% to reach the target average queue :
+Goal of Adaptive RED is to make 'probability' dynamic value between 1% and 50% to reach the target average queue :
.B (max - min) / 2
.fi
.SH EXAMPLE
.P
-# tc qdisc add dev eth0 parent 1:1 handle 10: red
- limit 400000 min 30000 max 90000 avpkt 1000
+# tc qdisc add dev eth0 parent 1:1 handle 10: red
+ limit 400000 min 30000 max 90000 avpkt 1000
burst 55 ecn adaptive bandwidth 10Mbit
.SH SEE ALSO
@@ -142,11 +142,11 @@ Goal of Adaptive RED is to make 'probability' dynamic value between 1% and 50% t
.BR tc-choke (8)
.SH SOURCES
-.TP
+.TP
o
Floyd, S., and Jacobson, V., Random Early Detection gateways for
Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html
-.TP
+.TP
o
Some changes to the algorithm by Alexey N. Kuznetsov.
.TP
@@ -156,7 +156,5 @@ Adaptive RED : http://icir.org/floyd/papers/adaptiveRed.pdf
.SH AUTHORS
Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, Alexey Makarenko
<makar@phoenix.kharkov.ua>, J Hadi Salim <hadi@nortelnetworks.com>,
-Eric Dumazet <eric.dumazet@gmail.com>.
+Eric Dumazet <eric.dumazet@gmail.com>.
This manpage maintained by bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-route.8 b/man/man8/tc-route.8
new file mode 100644
index 00000000..b865cd11
--- /dev/null
+++ b/man/man8/tc-route.8
@@ -0,0 +1,74 @@
+.TH "Route classifier in tc" 8 "21 Oct 2015" "iproute2" "Linux"
+
+.SH NAME
+route \- route traffic control filter
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR tc " " filter " ... " route " [ " from
+.IR REALM " | "
+.B fromif
+.IR TAG " ] [ "
+.B to
+.IR REALM " ] [ "
+.B classid
+.IR CLASSID " ] [ "
+.B action
+.IR ACTION_SPEC " ]"
+.SH DESCRIPTION
+Match packets based on routing table entries. This filter centers around the
+possibility to assign a
+.B realm
+to routing table entries. For any packet to be classified by this filter, a
+routing table lookup is performed and the returned
+.B realm
+is used to decide on whether the packet is a match or not.
+.SH OPTIONS
+.TP
+.BI action " ACTION_SPEC"
+Apply an action from the generic actions framework on matching packets.
+.TP
+.BI classid " CLASSID"
+Push matching packets into the class identified by
+.IR CLASSID .
+.TP
+.BI from " REALM"
+.TQ
+.BI fromif " TAG"
+Perform source route lookups.
+.I TAG
+is the name of an interface which must be present on the system at the time of
+.B tc
+invocation.
+.TP
+.BI to " REALM"
+Match if normal (i.e., destination) routing returns the given
+.IR REALM .
+.SH EXAMPLES
+Consider the subnet 192.168.2.0/24 being attached to eth0:
+
+.RS
+.EX
+ip route add 192.168.2.0/24 dev eth0 realm 2
+.EE
+.RE
+
+The following
+.B route
+filter will then match packets from that subnet:
+
+.RS
+.EX
+tc filter add ... route from 2 classid 1:2
+.EE
+.RE
+
+and pass packets on to class 1:2.
+.SH NOTES
+Due to implementation details,
+.B realm
+values must be in a range from 0 to 255, inclusive. Alternatively, a verbose
+name defined in /etc/iproute2/rt_realms may be given instead.
+.SH SEE ALSO
+.BR tc (8),
+.BR ip-route (8)
diff --git a/man/man8/tc-sfq.8 b/man/man8/tc-sfq.8
index 9afb5b24..ec4d8b8d 100644
--- a/man/man8/tc-sfq.8
+++ b/man/man8/tc-sfq.8
@@ -33,11 +33,11 @@ P
.SH DESCRIPTION
Stochastic Fairness Queueing is a classless queueing discipline available for
-traffic control with the
+traffic control with the
.BR tc (8)
command.
-SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'.
+SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'.
The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing
any single flow from drowning out the rest.
@@ -62,13 +62,13 @@ Destination address
(iii)
Source and Destination port
.P
-If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP.
-Packets with other protocols are hashed based on the 32bits representation of their
+If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP.
+Packets with other protocols are hashed based on the 32bits representation of their
destination and source. A flow corresponds mostly to a TCP/IP connection.
Each of these buckets should represent a unique flow. Because multiple flows may
-get hashed to the same bucket, sfqs internal hashing algorithm may be perturbed at configurable
-intervals so that the unfairness lasts only for a short while. Perturbation may
+get hashed to the same bucket, sfqs internal hashing algorithm may be perturbed at configurable
+intervals so that the unfairness lasts only for a short while. Perturbation may
however cause some inadvertent packet reordering to occur. After linux-3.3, there is
no packet reordering problem, but possible packet drops if rehashing hits one limit
(number of flows or packets per flow)
@@ -88,7 +88,7 @@ divisor
Can be used to set a different hash table size, available from kernel 2.6.39 onwards.
The specified divisor must be a power of two and cannot be larger than 65536.
Default value: 1024.
-.TP
+.TP
limit
Upper limit of the SFQ. Can be used to reduce the default length of 127 packets.
After linux-3.3, it can be raised.
@@ -97,12 +97,12 @@ depth
Limit of packets per flow (after linux-3.3). Default to 127 and can be lowered.
.TP
perturb
-Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that
+Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that
no perturbation occurs. Do not set too low for each perturbation may cause some packet
reordering or losses. Advised value: 60
This value has no effect when external flow classification is used.
Its better to increase divisor value to lower risk of hash collisions.
-.TP
+.TP
quantum
Amount of bytes a flow is allowed to dequeue during a round of the round robin process.
Defaults to the MTU of the interface which is also the advised value and the minimum value.
@@ -142,7 +142,7 @@ Specified in bytes. Used with burst to determine the time constant for average q
burst
Used for determining how fast the average queue size is influenced by the real queue size.
.nf
-Default value is :
+Default value is :
.B (2 * min + max) / (3 * avpkt)
.fi
.TP
@@ -166,16 +166,16 @@ To attach to device ppp0:
.P
# tc qdisc add dev ppp0 root sfq
.P
-Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful
+Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful
if it owns the queue.
-This is the case when the link speed equals the actually available bandwidth. This holds
-for regular phone modems, ISDN connections and direct non-switched ethernet links.
+This is the case when the link speed equals the actually available bandwidth. This holds
+for regular phone modems, ISDN connections and direct non-switched ethernet links.
.P
-Most often, cable modems and DSL devices do not fall into this category. The same holds
-for when connected to a switch and trying to send data to a congested segment also
+Most often, cable modems and DSL devices do not fall into this category. The same holds
+for when connected to a switch and trying to send data to a congested segment also
connected to the switch.
.P
-In this case, the effective queue does not reside within Linux and is therefore not
+In this case, the effective queue does not reside within Linux and is therefore not
available for scheduling.
.P
Embed SFQ in a classful qdisc to make sure it owns the queue.
@@ -191,11 +191,11 @@ changed the sfq default of 1024, use the same value for the flow hash filter, to
.P
Example of sfq with optional RED mode :
.P
-# tc qdisc add dev eth0 parent 1:1 handle 10: sfq limit 3000 flows 512 divisor 16384
+# tc qdisc add dev eth0 parent 1:1 handle 10: sfq limit 3000 flows 512 divisor 16384
redflowlimit 100000 min 8000 max 60000 probability 0.20 ecn headdrop
.SH SOURCE
-.TP
+.TP
o
Paul E. McKenney "Stochastic Fairness Queuing",
IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
@@ -205,7 +205,7 @@ o
Paul E. McKenney "Stochastic Fairness Queuing",
"Interworking: Research and Experience", v.2, 1991, p.113-131.
-.TP
+.TP
o
See also:
M. Shreedhar and George Varghese "Efficient Fair
@@ -220,5 +220,3 @@ Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>,
Eric Dumazet <eric.dumazet@gmail.com>.
.P
This manpage maintained by bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-tbf.8 b/man/man8/tc-tbf.8
index fc2c8372..d721b5d9 100644
--- a/man/man8/tc-tbf.8
+++ b/man/man8/tc-tbf.8
@@ -6,11 +6,11 @@ tbf \- Token Bucket Filter
rate
.B burst
bytes/cell
-.B ( latency
-ms
+.B ( latency
+ms
.B | limit
bytes
-.B ) [ mpu
+.B ) [ mpu
bytes
.B [ peakrate
rate
@@ -22,46 +22,46 @@ burst is also known as buffer and maxburst. mtu is also known as minburst.
.SH DESCRIPTION
The Token Bucket Filter is a classful queueing discipline available for
-traffic control with the
+traffic control with the
.BR tc (8)
command.
TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle
-itself, although packets are available, to ensure that the configured rate is not exceeded.
-It is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness,
+itself, although packets are available, to ensure that the configured rate is not exceeded.
+It is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness,
sending out data exactly at the configured rates.
Much higher rates are possible but at the cost of losing the minimal burstiness. In that
-case, data is on average dequeued at the configured rate but may be sent much faster at millisecond
+case, data is on average dequeued at the configured rate but may be sent much faster at millisecond
timescales. Because of further queues living in network adaptors, this is often not a problem.
.SH ALGORITHM
-As the name implies, traffic is filtered based on the expenditure of
+As the name implies, traffic is filtered based on the expenditure of
.B tokens.
Tokens roughly correspond to bytes, with the additional constraint
that each packet consumes some tokens, no matter how small it is. This
reflects the fact that even a zero-sized packet occupies the link for
some time.
-On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst
+On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst
in one go. Tokens arrive at a steady rate, until the bucket is full.
-If no tokens are available, packets are queued, up to a configured limit. The TBF now
+If no tokens are available, packets are queued, up to a configured limit. The TBF now
calculates the token deficit, and throttles until the first packet in the queue can be sent.
-If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured
+If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured
to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF
with a very small bucket, so that it doesn't burst.
-To achieve perfection, the second bucket may contain only a single packet, which leads to
-the earlier mentioned 1mbit/s limit.
+To achieve perfection, the second bucket may contain only a single packet, which leads to
+the earlier mentioned 1mbit/s limit.
This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends
-on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100
+on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100
packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s.
.SH PARAMETERS
-See
+See
.BR tc (8)
for how to specify the units of these values.
.TP
@@ -71,30 +71,30 @@ available. You can also specify this the other way around by setting the
latency parameter, which specifies the maximum amount of time a packet can
sit in the TBF. The latter calculation takes into account the size of the
bucket, the rate and possibly the peakrate (if set). These two parameters
-are mutually exclusive.
+are mutually exclusive.
.TP
burst
Also known as buffer or maxburst.
-Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously.
-In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer
+Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously.
+In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer
if you want to reach your configured rate!
If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket.
The minimum buffer size can be calculated by dividing the rate by HZ.
-Token usage calculations are performed using a table which by default has a resolution of 8 packets.
-This resolution can be changed by specifying the
+Token usage calculations are performed using a table which by default has a resolution of 8 packets.
+This resolution can be changed by specifying the
.B cell
size with the burst. For example, to specify a 6000 byte buffer with a 16
byte cell size, set a burst of 6000/16. You will probably never have to set
this. Must be an integral power of 2.
.TP
mpu
-A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit
+A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit
determines the minimal token usage (specified in bytes) for a packet. Defaults to zero.
.TP
rate
-The speed knob. See remarks above about limits! See
+The speed knob. See remarks above about limits! See
.BR tc (8)
for units.
.PP
@@ -112,7 +112,7 @@ Specifies the size of the peakrate bucket. For perfect accuracy, should be set t
If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst
allows around 3mbit/s of peakrate, given 1000 byte packets.
-Like the regular burstsize you can also specify a
+Like the regular burstsize you can also specify a
.B cell
size.
.SH EXAMPLE & USAGE
@@ -139,5 +139,3 @@ the limit/latency is not effective anymore.
.SH AUTHOR
Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
bert hubert <ahu@ds9a.nl>
-
-
diff --git a/man/man8/tc-tcindex.8 b/man/man8/tc-tcindex.8
new file mode 100644
index 00000000..7fcf8254
--- /dev/null
+++ b/man/man8/tc-tcindex.8
@@ -0,0 +1,58 @@
+.TH "Traffic control index filter" 8 "21 Oct 2015" "iproute2" "Linux"
+
+.SH NAME
+tcindex \- traffic control index filter
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR tc " " filter " ... " tcindex " [ " hash
+.IR SIZE " ] [ "
+.B mask
+.IR MASK " ] [ "
+.B shift
+.IR SHIFT " ] [ "
+.BR pas_on " | " fall_through " ] [ " classid
+.IR CLASSID " ] [ "
+.B action
+.BR ACTION_SPEC " ]"
+.SH DESCRIPTION
+This filter allows to match packets based on their
+.B tcindex
+field value, i.e. the combination of the DSCP and ECN fields as present in IPv4
+and IPv6 headers.
+.SH OPTIONS
+.TP
+.BI action " ACTION_SPEC"
+Apply an action from the generic actions framework on matching packets.
+.TP
+.BI classid " CLASSID"
+Push matching packets into the class identified by
+.IR CLASSID .
+.TP
+.BI hash " SIZE"
+Hash table size in entries to use. Defaults to 64.
+.TP
+.BI mask " MASK"
+An optional bitmask to binary
+.BR AND " to the packet's " tcindex
+field before use.
+.TP
+.BI shift " SHIFT"
+The number of bits to right-shift a packet's
+.B tcindex
+value before use. If a
+.B mask
+has been set, masking is done before shifting.
+.TP
+.B pass_on
+If this flag is set, failure to find a class for the resulting ID will make the
+filter fail and lead to the next filter being consulted.
+.TP
+.B fall_through
+This is the opposite of
+.B pass_on
+and the default. The filter will classify the packet even if there is no class
+present for the resulting class ID.
+
+.SH SEE ALSO
+.BR tc (8)
diff --git a/man/man8/tc-u32.8 b/man/man8/tc-u32.8
new file mode 100644
index 00000000..47c8f2d0
--- /dev/null
+++ b/man/man8/tc-u32.8
@@ -0,0 +1,663 @@
+.TH "Universal 32bit classifier in tc" 8 "25 Sep 2015" "iproute2" "Linux"
+
+.SH NAME
+u32 \- universal 32bit traffic control filter
+.SH SYNOPSIS
+.in +8
+.ti -8
+.BR tc " " filter " ... [ " handle
+.IR HANDLE " ] "
+.B u32
+.IR OPTION_LIST " [ "
+.B offset
+.IR OFFSET " ] [ "
+.B hashkey
+.IR HASHKEY " ] [ "
+.B classid
+.IR CLASSID " ] [ "
+.B divisor
+.IR uint_value " ] [ "
+.B order
+.IR u32_value " ] [ "
+.B ht
+.IR HANDLE " ] [ "
+.B sample
+.IR SELECTOR " [ "
+.B divisor
+.IR uint_value " ] ] [ "
+.B link
+.IR HANDLE " ] [ "
+.B indev
+.IR ifname " ] [ "
+.BR help " ]"
+
+.ti -8
+.IR HANDLE " := { "
+\fIu12_hex_htid\fB:\fR[\fIu8_hex_hash\fB:\fR[\fIu12_hex_nodeid\fR] | \fB0x\fIu32_hex_value\fR }
+
+.ti -8
+.IR OPTION_LIST " := [ " OPTION_LIST " ] " OPTION
+
+.ti -8
+.IR HASHKEY " := [ "
+.B mask
+.IR u32_hex_value " ] [ "
+.B at
+.IR 4*int_value " ]"
+
+.ti -8
+.IR CLASSID " := { "
+.BR root " | "
+.BR none " | "
+[\fIu16_major\fR]\fB:\fIu16_minor\fR | \fIu32_hex_value\fR }
+
+.ti -8
+.IR OFFSET " := [ "
+.B plus
+.IR int_value " ] [ "
+.B at
+.IR 2*int_value " ] [ "
+.B mask
+.IR u16_hex_value " ] [ "
+.B shift
+.IR int_value " ] [ "
+.BR eat " ]"
+
+.ti -8
+.IR OPTION " := { "
+.B match
+.IR SELECTOR " | "
+.B action
+.IR ACTION " } "
+
+.ti -8
+.IR SELECTOR " := { "
+.B u32
+.IR VAL_MASK_32 " | "
+.B u16
+.IR VAL_MASK_16 " | "
+.B u8
+.IR VAL_MASK_8 " | "
+.B ip
+.IR IP " | "
+.B ip6
+.IR IP6 " | { "
+.BR tcp " | " udp " } "
+.IR TCPUDP " | "
+.B icmp
+.IR ICMP " | "
+.B mark
+.IR VAL_MASK_32 " | "
+.B ether
+.IR ETHER " }"
+
+.ti -8
+.IR IP " := { { "
+.BR src " | " dst " } { " default " | " any " | " all " | "
+.IR ip_address " [ "
+.BR / " { "
+.IR prefixlen " | " netmask " } ] } " AT " | { "
+.BR dsfield " | " ihl " | " protocol " | " precedence " | "
+.BR icmp_type " | " icmp_code " } "
+.IR VAL_MASK_8 " | { "
+.BR sport " | " dport " } "
+.IR VAL_MASK_16 " | "
+.BR nofrag " | " firstfrag " | " df " | " mf " }"
+
+.ti -8
+.IR IP6 " := { { "
+.BR src " | " dst " } { " default " | " any " | " all " | "
+.IR ip6_address " [/" prefixlen " ] } " AT " | "
+.B priority
+.IR VAL_MASK_8 " | { "
+.BR protocol " | " icmp_type " | " icmp_code " } "
+.IR VAL_MASK_8 " | "
+.B flowlabel
+.IR VAL_MASK_32 " | { "
+.BR sport " | " dport " } "
+.IR VAL_MASK_16 " }"
+
+.ti -8
+.IR TCPUDP " := { "
+.BR src " | " dst " } "
+.I VAL_MASK_16
+
+.ti -8
+.IR ICMP " := { "
+.B type
+.IR VAL_MASK_8 " | "
+.B code
+.IR VAL_MASK_8 " }"
+
+.ti -8
+.IR ETHER " := { "
+.BR src " | " dst " } "
+.IR ether_address " " AT
+
+.ti -8
+.IR VAL_MASK_32 " := " u32_value " " u32_hex_mask " [ " AT " ]"
+
+.ti -8
+.IR VAL_MASK_16 " := " u16_value " " u16_hex_mask " [ " AT " ]"
+
+.ti -8
+.IR VAL_MASK_8 " := " u8_value " " u8_hex_mask " [ " AT " ]"
+
+.ti -8
+.IR AT " := [ "
+.BR at " [ " nexthdr+ " ] "
+.IR int_value " ]"
+.SH DESCRIPTION
+The Universal/Ugly 32bit filter allows to match arbitrary bitfields in the
+packet. Due to breaking everything down to values, masks and offsets, It is
+equally powerful and hard to use. Luckily many abstracting directives are
+present which allow defining rules on a higher level and therefore free the
+user from having to fiddle with bits and masks in many cases.
+
+There are two general modes of invocation: The first mode creates a new filter
+to delegate packets to different destinations. Apart from the obvious ones,
+namely classifying the packet by specifying a
+.I CLASSID
+or calling an
+.BR action ,
+one may
+.B link
+one filter to another one (or even a list of them), effectively organizing
+filters into a tree-like hierarchy.
+
+Typically filter delegation is done by means of a hash table, which leads to the
+second mode of invocation: it merely serves to set up these hash tables. Filters
+can select a hash table and provide a key selector from which a hash is to be
+computed and used as key to lookup the table's bucket which contains filters for
+further processing. This is useful if a high number of filters is in use, as the
+overhead of performing the hash operation and table lookup becomes negligible in
+that case. Using hashtables with
+.B u32
+basically involves the following pattern:
+.IP (1) 4
+Creating a new hash table, specifying it's size using the
+.B divisor
+parameter and ideally a handle by which the table can be identified. If the
+latter is not given, the kernel chooses one on it's own, which has to be
+guessed later.
+.IP (2) 4
+Creating filters which link to the created table in
+.I (1)
+using the
+.B link
+parameter and defining the packet data which the kernel will use to calculate
+the
+.BR hashkey .
+.IP (3) 4
+Adding filters to buckets in the hash table from
+.IR (1) .
+In order to avoid having to know how exactly the kernel creates the hash key,
+there is the
+.B sample
+parameter, which gives sample data to hash and thereby define the table bucket
+the filter should be added to.
+
+.RE
+In fact, even if not explicitly requested
+.B u32
+creates a hash table for every
+.B priority
+a filter is being added with. The table's size is 1 though, so it is in fact
+merely a linked list.
+.SH VALUES
+Options and selectors require values to be specified in a specific format, which
+is often non-intuitive. Therefore the terminals in
+.I SYNOPSIS
+have been given descriptive names to indicate the required format and/or maximum
+allowed numeric value: Prefixes
+.IR u32 ", " u16 " and " u8
+indicate four, two and single byte unsigned values. E.g.
+.I u16
+indicates a two byte-sized value in range between 0 and 65535 (0xFFFF)
+inclusive. A prefix of
+.I int
+indicates a four byte signed value. A middle part of
+.I _hex_
+indicates that the value is parsed in hexadecimal format. Otherwise, the
+value's base is automatically detected, i.e. values prefixed with
+.I 0x
+are considered hexadecimal, a leading
+.I 0
+indicates octal format and decimal format otherwise. There are some values with
+special formatting as well:
+.IR ip_address " and " netmask
+are in dotted-quad formatting as usual for IPv4 addresses. An
+.I ip6_address
+is specified in common, colon-separated hexadecimal format. Finally,
+.I prefixlen
+is an unsigned, decimal integer value in range from 0 to the address width in
+bits (32 for IPv4 and 128 for IPv6).
+
+Sometimes values need to be dividable by a certain number. In that case a name
+of the form
+.I N*val
+was chosen, indicating that
+.I val
+must be dividable by
+.IR N .
+Or the other way around: the resulting value must be a multiple of
+.IR N .
+.SH OPTIONS
+.B U32
+recognizes the following options:
+.TP
+.BI handle " HANDLE"
+The handle is used to reference a filter and therefore must be unique. It
+consists of a hash table identifier
+.B htid
+and optional
+.B hash
+(which identifies the hash table's bucket) and
+.BR nodeid .
+All these values are parsed as unsigned, hexadecimal numbers with length 12bits
+(
+.BR htid " and " nodeid )
+or 8bits (
+.BR hash ).
+Alternatively one may specify a single, 32bit long hex number which contains
+the three fields bits in concatenated form. Other than the fields themselves, it
+has to be prefixed by
+.BR 0x .
+.TP
+.BI offset " OFFSET"
+Set an offset which defines where matches of subsequent filters are applied to.
+Therefore this option is useful only when combined with
+.BR link " or a combination of " ht " and " sample .
+The offset may be given explicitly by using the
+.B plus
+keyword, or extracted from the packet data with
+.BR at .
+It is possible to mangle the latter using
+.BR mask " and/or " shift
+keywords. By default, this offset is recorded but not implicitly applied. It is
+used only to substitute the
+.B nexthdr+
+statement. Using the keyword
+.B eat
+though inverses this behaviour: the offset is applied always, and
+.B nexthdr+
+will fall back to zero.
+.TP
+.BI hashkey " HASHKEY"
+Spefify what packet data to use to calculate a hash key for bucket lookup. The
+kernel adjusts the value according to the hash table's size. For this to work,
+the option
+.B link
+must be given.
+.TP
+.BI classid " CLASSID"
+Classify matching packets into the given
+.IR CLASSID ,
+which consists of either 16bit
+.BR major " and " minor
+numbers or a single 32bit value combining both.
+.TP
+.BI divisor " u32_value"
+Specify a modulo value. Used when creating hash tables to define their size or
+for declaring a
+.B sample
+to calculate hash table keys from. Must be a power of two with exponent not
+exceeding eight.
+.TP
+.BI order " u32_value"
+A value to order filters by, ascending. Conflicts with
+.B handle
+which serves the same purpose.
+.TP
+.BI sample " SELECTOR"
+Used together with
+.B ht
+to specify which bucket to add this filter to. This allows one to avoid having
+to know how exactly the kernel calculates hashes. The additional
+.B divisor
+defaults to 256, so must be given for hash tables of different size.
+.TP
+.BI link " HANDLE"
+Delegate matching packets to filters in a hash table.
+.I HANDLE
+is used to only specify the hash table, so only
+.BR htid " may be given, " hash " and " nodeid
+have to be omitted. By default, bucket number 0 will be used and can be
+overridden by the
+.B hashkey
+option.
+.TP
+.BI indev " ifname"
+Filter on the incoming interface of the packet. Obviously works only for
+forwarded traffic.
+.TP
+.BI help
+Print a brief help text about possible options.
+.SH SELECTORS
+Basically the only real selector is
+.B u32 .
+All others merely provide a higher level syntax and are internally translated
+into
+.B u32 .
+.TP
+.BI u32 " VAL_MASK_32"
+.TQ
+.BI u16 " VAL_MASK_16"
+.TQ
+.BI u8 " VAL_MASK_8"
+Match packet data to a given value. The selector name defines the sample length
+to extract (32bits for
+.BR u32 ,
+16bits for
+.B u16
+and 8bits for
+.BR u8 ).
+Before comparing, the sample is binary AND'ed with the given mask. This way
+uninteresting bits can be cleared before comparison. The position of the sample
+is defined by the offset specified in
+.IR AT .
+.TP
+.BI ip " IP"
+.TQ
+.BI ip6 " IP6"
+Assume packet starts with an IPv4 (
+.BR ip )
+or IPv6 (
+.BR ip6 )
+header.
+.IR IP / IP6
+then allows to match various header fields:
+.RS
+.TP
+.BI src " ADDR"
+.BI dst " ADDR"
+Compare Source or Destination Address fields against the value of
+.IR ADDR .
+The reserved words
+.BR default ", " any " and " all
+effectively match any address. Otherwise an IP address of the particular
+protocol is expected, optionally suffixed by a prefix length to match whole
+subnets. In case of IPv4 a netmask may also be given.
+.TP
+.BI dsfield " VAL_MASK_8"
+IPv4 only. Match the packet header's DSCP/ECN field. Synonyms to this are
+.BR tos " and " precedence .
+.TP
+.BI ihl " VAL_MASK_8"
+IPv4 only. Match the Internet Header Length field. Note that the value's unit is
+32bits, so to match a packet with 24byte header length
+.I u8_value
+has to be 6.
+.TP
+.BI protocol " VAL_MASK_8"
+Match the Protocol (IPv4) or Next Header (IPv6) field value, e.g. 6 for TCP.
+.TP
+.BI icmp_type " VAL_MASK_8"
+.TQ
+.BI icmp_code " VAL_MASK_8"
+Assume a next-header protocol of icmp or ipv6-icmp and match Type or Code
+field values. This is dangerous, as the code assumes minimal header size for
+IPv4 and lack of extension headers for IPv6.
+.TP
+.BI sport " VAL_MASK_16"
+.TQ
+.BI dport " VAL_MASK_16"
+Match layer four source or destination ports. This is dangerous as well, as it
+assumes a suitable layer four protocol is present (which has Source and
+Destination Port fields right at the start of the header and 16bit in size).
+Also minimal header size for IPv4 and lack of IPv6 extension headers is assumed.
+.TP
+.B nofrag
+.TQ
+.B firstfrag
+.TQ
+.B df
+.TQ
+.B mf
+IPv4 only, check certain flags and fragment offset values. Match if the packet
+is not a fragment
+.RB ( nofrag ),
+the first fragment
+.RB ( firstfrag ),
+if Don't Fragment
+.RB ( df )
+or More Fragments
+.RB ( mf )
+bits are set.
+.TP
+.BI priority " VAL_MASK_8"
+IPv6 only. Match the header's Traffic Class field, which has the same purpose
+and semantics of IPv4's ToS field since RFC 3168: upper six bits are DSCP, the
+lower two ECN.
+.TP
+.BI flowlabel " VAL_MASK_32"
+IPv6 only. Match the Flow Label field's value. Note that Flow Label itself is
+only 20bytes long, which are the least significant ones here. The remaining
+upper 12bytes match Version and Traffic Class fields.
+.RE
+.TP
+.BI tcp " TCPUDP"
+.TQ
+.BI udp " TCPUDP"
+Match fields of next header of protocol TCP or UDP. The possible values for
+.I TCPDUP
+are:
+.RS
+.TP
+.BI src " VAL_MASK_16"
+Match on Source Port field value.
+.TP
+.BI dst " VALMASK_16"
+Match on Destination Port field value.
+.RE
+.TP
+.BI icmp " ICMP"
+Match fields of next header of protocol ICMP. The possible values for
+.I ICMP
+are:
+.RS
+.TP
+.BI type " VAL_MASK_8"
+Match on ICMP Type field.
+.TP
+.BI code " VAL_MASK_8"
+Match on ICMP Code field.
+.RE
+.TP
+.BI mark " VAL_MASK_32"
+Match on netfilter fwmark value.
+.TP
+.BI ether " ETHER"
+Match on ethernet header fields. Possible values for
+.I ETHER
+are:
+.RS
+.TP
+.BI src " ether_address" " " AT
+.TQ
+.BI dst " ether_address" " " AT
+Match on source or destination ethernet address. This is dangerous: It assumes
+an ethernet header is present at the start of the packet. This will probably
+lead to unexpected things if used with layer three interfaces like e.g. tun or
+ppp.
+.SH EXAMPLES
+.RS
+.EX
+tc filter add dev eth0 parent 999:0 prio 99 protocol ip u32 \\
+ match ip src 192.168.8.0/24 classid 1:1
+.EE
+.RE
+
+This attaches a filter to the qdisc identified by
+.BR 999:0.
+It's priority is
+.BR 99 ,
+which affects in which order multiple filters attached to the same
+.B parent
+are consulted (the lower the earlier). The filter handles packets of
+.B protocol
+type
+.BR ip ,
+and
+.BR match es
+if the IP header's source address is within the
+.B 192.168.8.0/24
+subnet. Matching packets are classified into class
+.BR 1.1 .
+The effect of this command might be surprising at first glance:
+
+.RS
+.EX
+filter parent 1: protocol ip pref 99 u32
+filter parent 1: protocol ip pref 99 u32 \\
+ fh 800: ht divisor 1
+filter parent 1: protocol ip pref 99 u32 \\
+ fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 \\
+ match c0a80800/ffffff00 at 12
+.EE
+.RE
+
+So parent
+.B 1:
+is assigned a new
+.B u32
+filter, which contains a hash table of size 1 (as the
+.B divisor
+indicates). The table ID is
+.BR 800 .
+The third line then shows the actual filter which was added above: it sits in
+table
+.B 800
+and bucket
+.BR 0 ,
+classifies packets into class ID
+.B 1:1
+and matches the upper three bytes of the four byte value at offset
+.B 12
+to be
+.BR 0xc0a808 ,
+which is 192, 168 and 8.
+
+Now for something more complicated, namely creating a custom hash table:
+
+.RS
+.EX
+tc filter add dev eth0 prio 99 handle 1: u32 divisor 256
+.EE
+.RE
+
+This creates a table of size 256 with handle
+.B 1:
+in priority
+.BR 99 .
+The effect is as follows:
+
+.RS
+.EX
+filter parent 1: protocol all pref 99 u32
+filter parent 1: protocol all pref 99 u32 fh 1: ht divisor 256
+filter parent 1: protocol all pref 99 u32 fh 800: ht divisor 1
+.EE
+.RE
+
+So along with the requested hash table (handle
+.BR 1: ),
+the kernel has created his own table of size 1 to hold other filters of the same
+priority.
+
+The next step is to create a filter which links to the created hash table:
+
+.RS
+.EX
+tc filter add dev eth0 parent 1: prio 1 u32 \\
+ link 1: hashkey mask 0x0000ff00 at 12 \\
+ match ip src 192.168.0.0/16
+.EE
+.RE
+
+The filter is given a lower priority than the hash table itself so
+.B u32
+consults it before manually traversing the hash table. The options
+.BR link " and " hashkey
+determine which table and bucket to redirect to. In this case the hash key
+should be constructed out of the second byte at offset 12, which corresponds to
+an IP packet's third byte of the source address field. Along with the
+.B match
+statement, this effectively maps all class C networks below 192.168.0.0/16 to
+different buckets of the hash table.
+
+Filters for certain subnets can be created like so:
+
+.RS
+.EX
+tc filter add dev eth0 parent 1: prio 99 u32 \\
+ ht 1: sample u32 0x00000800 0x0000ff00 at 12 \\
+ match ip src 192.168.8.0/24 classid 1:1
+.EE
+.RE
+
+The bucket is defined using the
+.B sample
+option: In this case, the second byte at offset 12 must be 0x08, exactly. In
+this case, the resulting bucket ID is obviously 8, but as soon as
+.B sample
+selects an amount of data which could exceed the
+.BR divisor ,
+one would have to know the kernel-internal algorithm to deduce the destination
+bucket. This filter's
+.B match
+statement is redundant in this case, as the entropy for the hash key does not
+exceed the table size and therefore no collisions can occur. Otherwise it's
+necessary to prevent matching unwanted packets.
+
+Matching upper layer fields is problematic since IPv4 header length is variable
+and IPv6 supports extension headers which affect upper layer header offset. To
+overcome this, there is the possibility to specify
+.B nexthdr+
+when giving an offset, and to make things easier there are the
+.BR tcp " and " udp
+matches which use
+.B nexthdr+
+implicitly. This offset has to be calculated in beforehand though, and the only
+way to achieve that is by doing it in a separate filter which then links to the
+filter which wants to use it. Here is an example of doing so:
+
+.RS
+.EX
+tc filter add dev eth0 parent 1:0 protocol ip handle 1: \\
+ u32 divisor 1
+tc filter add dev eth0 parent 1:0 protocol ip \\
+ u32 ht 1: \\
+ match tcp src 22 FFFF \\
+ classid 1:2
+tc filter add dev eth0 parent 1:0 protocol ip \\
+ u32 ht 800: \\
+ match ip protocol 6 FF \\
+ match ip firstfrag \\
+ offset at 0 mask 0f00 shift 6 \\
+ link 1:
+.EE
+.RE
+
+This is what is being done: In the first call, a single element sized hash table
+is created so there is a place to hold the linked to filter and a known handle
+.RB ( 1: )
+to reference to it. The second call then adds the actual filter, which pushes
+packets with TCP source port 22 into class
+.BR 1:2 .
+Using
+.BR ht ,
+it is moved into the hash table created by the first call. The third call then
+does the actual magic: It matches IPv4 packets with next layer protocol 6 (TCP),
+only if it's the first fragment (usually TCP sets DF bit, but if it doesn't and
+the packet is fragmented, only the first one contains the TCP header), and then
+sets the offset based on the IP header's IHL field (right-shifting by 6
+eliminates the offset of the field and at the same time converts the value into
+byte unit). Finally, using
+.BR link ,
+the hash table from first call is referenced which holds the filter from second
+call.
+.SH SEE ALSO
+.BR tc (8),
+.br
+.BR cls_u32.txt " at " http://linux-tc-notes.sourceforge.net/
diff --git a/man/man8/tc.8 b/man/man8/tc.8
index 434fe6c3..4e99dcad 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -144,23 +144,139 @@ It is important to notice that filters reside
.B within
qdiscs - they are not masters of what happens.
+The available filters are:
+.TP
+basic
+Filter packets based on an ematch expression. See
+.BR tc-ematch (8)
+for details.
+.TP
+bpf
+Filter packets using (e)BPF, see
+.BR tc-bpf (8)
+for details.
+.TP
+cgroup
+Filter packets based on the control group of their process. See
+. BR tc-cgroup (8)
+for details.
+.TP
+flow, flower
+Flow-based classifiers, filtering packets based on their flow (identified by selectable keys). See
+.BR tc-flow "(8) and"
+.BR tc-flower (8)
+for details.
+.TP
+fw
+Filter based on fwmark. Directly maps fwmark value to traffic class. See
+.BR tc-fw (8).
+.TP
+route
+Filter packets based on routing table. See
+.BR tc-route (8)
+for details.
+.TP
+rsvp
+Match Resource Reservation Protocol (RSVP) packets.
+.TP
+tcindex
+Filter packets based on traffic control index. See
+.BR tc-tcindex (8).
+.TP
+u32
+Generic filtering on arbitrary packet data, assisted by syntax to abstract common operations. See
+.BR tc-u32 (8)
+for details.
+
.SH CLASSLESS QDISCS
The classless qdiscs are:
.TP
+choke
+CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for unresponsive
+flows) is a classless qdisc designed to both identify and penalize flows that
+monopolize the queue. CHOKe is a variation of RED, and the configuration is
+similar to RED.
+.TP
+codel
+CoDel (pronounced "coddle") is an adaptive "no-knobs" active queue management
+algorithm (AQM) scheme that was developed to address the shortcomings of
+RED and its variants.
+.TP
[p|b]fifo
Simplest usable qdisc, pure First In, First Out behaviour. Limited in
packets or in bytes.
.TP
+fq
+Fair Queue Scheduler realises TCP pacing and scales to millions of concurrent
+flows per qdisc.
+.TP
+fq_codel
+Fair Queuing Controlled Delay is queuing discipline that combines Fair
+Queuing with the CoDel AQM scheme. FQ_Codel uses a stochastic model to classify
+incoming packets into different flows and is used to provide a fair share of the
+bandwidth to all the flows using the queue. Each such flow is managed by the
+CoDel queuing discipline. Reordering within a flow is avoided since Codel
+internally uses a FIFO queue.
+.TP
+gred
+Generalized Random Early Detection combines multiple RED queues in order to
+achieve multiple drop priorities. This is required to realize Assured
+Forwarding (RFC 2597).
+.TP
+hhf
+Heavy-Hitter Filter differentiates between small flows and the opposite,
+heavy-hitters. The goal is to catch the heavy-hitters and move them to a
+separate queue with less priority so that bulk traffic does not affect the
+latency of critical traffic.
+.TP
+ingress
+This is a special qdisc as it applies to incoming traffic on an interface, allowing for it to be filtered and policed.
+.TP
+mqprio
+The Multiqueue Priority Qdisc is a simple queuing discipline that allows
+mapping traffic flows to hardware queue ranges using priorities and a
+configurable priority to traffic class mapping. A traffic class in this context
+is a set of contiguous qdisc classes which map 1:1 to a set of hardware exposed
+queues.
+.TP
+multiq
+Multiqueue is a qdisc optimized for devices with multiple Tx queues. It has
+been added for hardware that wishes to avoid head-of-line blocking. It will
+cycle though the bands and verify that the hardware queue associated with the
+band is not stopped prior to dequeuing a packet.
+.TP
+netem
+Network Emulator is an enhancement of the Linux traffic control facilities that
+allow to add delay, packet loss, duplication and more other characteristics to
+packets outgoing from a selected network interface.
+.TP
pfifo_fast
Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band
queue which honors Type of Service flags, as well as the priority that may be
assigned to a packet.
.TP
+pie
+Proportional Integral controller-Enhanced (PIE) is a control theoretic active
+queue management scheme. It is based on the proportional integral controller but
+aims to control delay.
+.TP
red
Random Early Detection simulates physical congestion by randomly dropping
packets when nearing configured bandwidth allocation. Well suited to very
large bandwidth applications.
.TP
+rr
+Round-Robin qdisc with support for multiqueue network devices. Removed from
+Linux since kernel version 2.6.27.
+.TP
+sfb
+Stochastic Fair Blue is a classless qdisc to manage congestion based on
+packet loss and link utilization history while trying to prevent
+non-responsive flows (i.e. flows that do not react to congestion marking
+or dropped packets) from impacting performance of responsive flows.
+Unlike RED, where the marking probability has to be configured, BLUE
+tries to determine the ideal marking probability automatically.
+.TP
sfq
Stochastic Fairness Queueing reorders queued traffic so each 'session'
gets to send a packet in turn.
@@ -190,12 +306,31 @@ qdisc is the automatic default in the absence of a configured qdisc.
.SH CLASSFUL QDISCS
The classful qdiscs are:
.TP
+ATM
+Map flows to virtual circuits of an underlying asynchronous transfer mode
+device.
+.TP
CBQ
Class Based Queueing implements a rich linksharing hierarchy of classes.
It contains shaping elements as well as prioritizing capabilities. Shaping is
performed using link idle time calculations based on average packet size and
underlying link bandwidth. The latter may be ill-defined for some interfaces.
.TP
+DRR
+The Deficit Round Robin Scheduler is a more flexible replacement for Stochastic
+Fairness Queuing. Unlike SFQ, there are no built-in queues \-\- you need to add
+classes and then set up filters to classify packets accordingly. This can be
+useful e.g. for using RED qdiscs with different settings for particular
+traffic. There is no default class \-\- if a packet cannot be classified, it is
+dropped.
+.TP
+DSMARK
+Classify packets based on TOS field, change TOS field of packets based on
+classification.
+.TP
+HFSC
+Hierarchical Fair Service Curve guarantees precise bandwidth and delay allocation for leaf classes and allocates excess bandwidth fairly. Unlike HTB, it makes use of packet dropping to achieve low delays which interactive sessions benefit from.
+.TP
HTB
The Hierarchy Token Bucket implements a rich linksharing hierarchy of
classes with an emphasis on conforming to existing practices. HTB facilitates
@@ -209,6 +344,13 @@ classes which are dequeued in order. This allows for easy prioritization
of traffic, where lower classes are only able to send if higher ones have
no packets available. To facilitate configuration, Type Of Service bits are
honored by default.
+.TP
+QFQ
+Quick Fair Queueing is an O(1) scheduler that provides near-optimal guarantees,
+and is the first to achieve that goal with a constant cost also with respect to
+the number of groups and the packet length. The QFQ algorithm has no loops, and
+uses very simple instructions and data structures that lend themselves very
+well to a hardware implementation.
.SH THEORY OF OPERATION
Classes form a tree, where each class has a single parent.
A class may have multiple children. Some qdiscs allow for runtime addition
@@ -467,7 +609,7 @@ to
.TP
.BR "\-cf" , " \-conf " <FILENAME>
-specifies path to the config file. This option is used in conjuction with other options (e.g.
+specifies path to the config file. This option is used in conjunction with other options (e.g.
.BR -nm ")."
.SH FORMAT
@@ -557,13 +699,20 @@ Shows classes as ASCII graph with stats info under each class.
.B tc
was written by Alexey N. Kuznetsov and added in Linux 2.2.
.SH SEE ALSO
+.BR tc-basic (8),
.BR tc-bfifo (8),
+.BR tc-bpf (8),
.BR tc-cbq (8),
+.BR tc-cgroup (8),
.BR tc-choke (8),
.BR tc-codel (8),
.BR tc-drr (8),
.BR tc-ematch (8),
+.BR tc-flow (8),
+.BR tc-flower (8),
+.BR tc-fq (8),
.BR tc-fq_codel (8),
+.BR tc-fw (8),
.BR tc-hfsc (7),
.BR tc-hfsc (8),
.BR tc-htb (8),
@@ -571,13 +720,15 @@ was written by Alexey N. Kuznetsov and added in Linux 2.2.
.BR tc-pfifo (8),
.BR tc-pfifo_fast (8),
.BR tc-red (8),
+.BR tc-route (8),
.BR tc-sfb (8),
.BR tc-sfq (8),
.BR tc-stab (8),
.BR tc-tbf (8),
+.BR tc-tcindex (8),
+.BR tc-u32 (8),
.br
.RB "User documentation at " http://lartc.org/ ", but please direct bugreports and patches to: " <netdev@vger.kernel.org>
.SH AUTHOR
Manpage maintained by bert hubert (ahu@ds9a.nl)
-
diff --git a/man/man8/tipc-bearer.8 b/man/man8/tipc-bearer.8
new file mode 100644
index 00000000..565ee01d
--- /dev/null
+++ b/man/man8/tipc-bearer.8
@@ -0,0 +1,231 @@
+.TH TIPC-BEARER 8 "02 Jun 2015" "iproute2" "Linux"
+
+.\" For consistency, please keep padding right aligned.
+.\" For example '.B "foo " bar' and not '.B foo " bar"'
+
+.SH NAME
+tipc-bearer \- show or modify TIPC bearers
+
+.SH SYNOPSIS
+.ad l
+.in +8
+
+.ti -8
+.B tipc bearer enable
+.RB "[ " domain
+.IR DOMAIN " ]"
+.RB "[ " priority
+.IR PRIORITY " ]"
+.BR media
+.br
+.RB "{ { " eth " | " ib " } " device
+.IR "DEVICE" " }"
+.RB "|"
+.br
+.RB "{ " udp
+.B name
+.IR NAME
+.B localip
+.IR LOCALIP
+.RB "[ " localport
+.IR LOCALPORT " ]"
+.RB "[ " remoteip
+.IR REMOTEIP " ]"
+.RB "[ " remoteport
+.IR REMOTEPORT " ] }"
+.br
+
+.ti -8
+.B tipc bearer disable media
+.br
+.RB "{ { " eth " | " ib " } " device
+.IR DEVICE
+.RB "|"
+.br
+.RB "{ " udp
+.B name
+.IR NAME
+.B localip
+.IR LOCALIP " } }"
+.br
+
+.ti -8
+.B tipc bearer set
+.RB "{ " "priority "
+.IR PRIORITY
+.RB "| " tolerance
+.IR TOLERANCE
+.RB "| " window
+.IR WINDOW
+.RB "} " media
+.br
+.RB "{ { " eth " | " ib " } " device
+.IR "DEVICE" " }"
+.RB "|"
+.br
+.RB "{ " udp
+.B name
+.IR NAME
+.B localip
+.IR LOCALIP " } }"
+.br
+
+.ti -8
+.B tipc bearer get
+.RB "{ " "priority" " | " tolerance " | " window " } " media
+.br
+.RB "{ { " eth " | " ib " } " device
+.IR "DEVICE" " }"
+.RB "|"
+.br
+.RB "{ " udp
+.B name
+.IR NAME
+.B localip
+.IR LOCALIP " } }"
+.br
+
+.ti -8
+.B tipc bearer list
+.br
+
+.SH OPTIONS
+Options (flags) that can be passed anywhere in the command chain.
+.TP
+.BR "\-h" , " --help"
+Show help about last valid command. For example
+.B tipc bearer --help
+will show bearer help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+.SH DESCRIPTION
+
+.SS Bearer identification
+.TP
+.BI "media " MEDIA
+.br
+Specifies the TIPC media type for a particular bearer to operate on.
+Different media types have different ways of identifying a unique bearer.
+For example,
+.BR "ib " "and " eth
+identify a bearer with a
+.I DEVICE
+while
+.B udp
+identify a bearer with a
+.IR "LOCALIP " "and a " NAME
+
+.B ib
+- Infiniband
+.sp
+.B eth
+- Ethernet
+.sp
+.B udp
+- User Datagram Protocol (UDP)
+.sp
+
+.TP
+.BI "name " NAME
+.br
+Logical bearer identifier valid for bearers on
+.B udp
+media.
+
+.TP
+.BI "device " DEVICE
+.br
+Physical bearer device valid for bearers on
+.B eth
+and
+.B ib
+media.
+
+.SS Bearer properties
+
+.TP
+.B domain
+.br
+The addressing domain (region) in which a bearer will establish links and accept
+link establish requests.
+
+.TP
+.B priority
+.br
+Default link priority inherited by all links subsequently established over a
+bearer. A single bearer can only host one link to a particular node. This means
+the default link priority for a bearer typically affects which bearer to use
+when communicating with a particular node in an multi bearer setup. For more
+info about link priority see
+.BR tipc-link (8)
+
+.TP
+.B tolerance
+.br
+Default link tolerance inherited by all links subsequently established over a
+bearer. For more info about link tolerance see
+.BR tipc-link (8)
+
+.TP
+.B window
+.br
+Default link window inherited by all links subsequently established over a
+bearer. For more info about the link window size see
+.BR tipc-link (8)
+
+.SS UDP bearer options
+
+.TP
+.BI "localip " LOCALIP
+.br
+Specify a local IP v4/v6 address for a
+.B udp
+bearer.
+
+.TP
+.BI "localport " LOCALPORT
+.br
+Specify the local port for a
+.B udp
+bearer. The default port 6118 is used if no port is specified.
+
+.TP
+.BI "remoteip " REMOTEIP
+.br
+Specify a remote IP for a
+.B udp
+bearer. If no remote IP is specified a
+.B udp
+bearer runs in multicast mode and tries to auto-discover its neighbours.
+The multicast IP address is generated based on the TIPC network ID. If a remote
+IP is specified the
+.B udp
+bearer runs in point-to-point mode.
+
+.TP
+.BI "remoteport " REMOTEPORT
+.br
+Specify the remote port for a
+.B udp
+bearer. The default port 6118 is used if no port is specified.
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc (8),
+.BR tipc-link (8),
+.BR tipc-media (8),
+.BR tipc-nametable (8),
+.BR tipc-node (8),
+.BR tipc-peer (8),
+.BR tipc-socket (8)
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/man/man8/tipc-link.8 b/man/man8/tipc-link.8
new file mode 100644
index 00000000..2ee03a0b
--- /dev/null
+++ b/man/man8/tipc-link.8
@@ -0,0 +1,226 @@
+.TH TIPC-LINK 8 "02 Jun 2015" "iproute2" "Linux"
+
+.\" For consistency, please keep padding right aligned.
+.\" For example '.B "foo " bar' and not '.B foo " bar"'
+
+.SH NAME
+tipc-link \- show links or modify link properties
+
+.SH SYNOPSIS
+.ad l
+.in +8
+
+.ti -8
+
+.ti -8
+.B tipc link set
+.RB "{ " "priority "
+.IR PRIORITY
+.RB "| " tolerance
+.IR TOLERANCE
+.RB "| " window
+.IR "WINDOW " }
+.BI "link " LINK
+
+.ti -8
+.B tipc link get
+.RB "{ " "priority" " | " tolerance " | " window " } " link
+.I LINK
+
+.ti -8
+.B tipc link statistics
+.RB "{ " "show " "[ " link
+.I LINK
+.RB "] | " "reset
+.BI "link " "LINK "
+}
+
+.ti -8
+.B tipc link list
+.br
+
+.SH OPTIONS
+Options (flags) that can be passed anywhere in the command chain.
+.TP
+.BR "\-h" , " --help"
+Show help about last valid command. For example
+.B tipc link --help
+will show link help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+.SH DESCRIPTION
+
+.SS Link statistics
+
+.TP
+.BR "ACTIVE " "link state"
+.br
+An
+.B ACTIVE
+link is serving traffic. Two links to the same node can become
+.B ACTIVE
+if they have the same link
+.BR priority .
+If there is more than two links with the same priority the additional links will
+be put in
+.B STANDBY
+state.
+
+.TP
+.BR "STANDBY " "link state"
+.br
+A
+.B STANDBY
+link has lower link priority than an
+.B ACTIVE
+link. A
+.B STANDBY
+link has control traffic flowing and is ready to take over should the
+.B ACTIVE
+link(s) go down.
+
+.TP
+.B MTU
+.br
+The Maximum Transmission Unit. The two endpoints advertise their default or
+configured
+.B MTU
+at initial link setup and will agree to use the lower of the two values should
+they differ.
+
+.TP
+.B Packets
+.br
+The total amount of transmitted or received TIPC packets on a link. Including
+.BR "fragmented " "and " "bundled " packets.
+
+.TP
+.B Fragments
+.br
+Represented in the form
+.BR fragments / fragmented .
+Where
+.B fragmented
+is the amount of data messages which have been broken into
+.BR fragments .
+Subsequently the
+.B fragments
+are the total amount of packets that the
+.B fragmented
+messages has been broken into.
+
+.TP
+.B Bundles
+.br
+Represented in the form
+.BR bundles / bundled .
+If a link becomes congested the link will attempt to bundle data from small
+.B bundled
+packets into
+.B bundles
+of full MTU size packets before they are transmitted.
+
+.TP
+.B Profile
+.br
+Shows the
+.B average
+packet size in octets/bytes for a
+.B sample
+of packets. It also shows the packet size distribution of the
+.B sampled
+packets in the intervals
+
+0-64 bytes
+.br
+64-256 bytes
+.br
+256-1024 bytes
+.br
+1024-4096 bytes
+.br
+4096-16384 bytes
+.br
+16384-32768 bytes
+.br
+32768-66000 bytes
+
+.TP
+.B Message counters
+
+.B states
+- Number of link state messages
+.sp
+
+.B probes
+- Link state messages with probe flag set. Typically sent when a link is idle
+.sp
+
+.B nacks
+- Number of negative acknowledgement (NACK) packets sent and received by the
+link
+.sp
+
+.B defs
+- Number of packets received out of order
+.sp
+
+.B dups
+- Number of duplicate packets received
+
+.TP
+.B Congestion link
+The number of times an application has tried to send data when the TIPC link
+was congested
+
+.TP
+.B Send queue
+.B Max
+is the maximum amount of messages that has resided in the out queue during the
+statistics collection period of a link.
+
+.B Avg
+is the average outqueue size during the lifetime of a link.
+
+.SS Link properties
+
+.TP
+.B priority
+.br
+The priority between logical TIPC links to a particular node. Link priority can
+range from 0 (lowest) to 31 (highest).
+
+.TP
+.B tolerance
+.br
+Link tolerance specifies the maximum time in milliseconds that TIPC will allow
+a communication problem to exist before taking the link down. The default value
+is 1500 milliseconds.
+
+.TP
+.B window
+.br
+The link window controls how many unacknowledged messages a link endpoint can
+have in its transmit queue before TIPC's congestion control mechanism is
+activated.
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc (8),
+.BR tipc-media (8),
+.BR tipc-bearer (8),
+.BR tipc-nametable (8),
+.BR tipc-node (8),
+.BR tipc-peer (8),
+.BR tipc-socket (8)
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/man/man8/tipc-media.8 b/man/man8/tipc-media.8
new file mode 100644
index 00000000..4689cb3f
--- /dev/null
+++ b/man/man8/tipc-media.8
@@ -0,0 +1,87 @@
+.TH TIPC-MEDIA 8 "02 Jun 2015" "iproute2" "Linux"
+
+.\" For consistency, please keep padding right aligned.
+.\" For example '.B "foo " bar' and not '.B foo " bar"'
+
+.SH NAME
+tipc-media \- list or modify media properties
+
+.SH SYNOPSIS
+.ad l
+.in +8
+
+.ti -8
+
+.ti -8
+.B tipc media set
+.RB "{ " "priority "
+.IR PRIORITY
+.RB "| " tolerance
+.IR TOLERANCE
+.RB "| " window
+.IR "WINDOW " }
+.BI "media " MEDIA
+
+.ti -8
+.B tipc media get
+.RB "{ " "priority" " | " tolerance " | " window " } " media
+.I MEDIA
+
+.ti -8
+.B tipc media list
+.br
+
+.SH OPTIONS
+Options (flags) that can be passed anywhere in the command chain.
+.TP
+.BR "\-h" , " --help"
+Show help about last valid command. For example
+.B tipc media --help
+will show media help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+.SH DESCRIPTION
+
+.SS Media properties
+
+.TP
+.B priority
+.br
+Default link priority inherited by all bearers subsequently enabled on a
+media. For more info about link priority see
+.BR tipc-link (8)
+
+.TP
+.B tolerance
+.br
+Default link tolerance inherited by all bearers subsequently enabled on a
+media. For more info about link tolerance see
+.BR tipc-link (8)
+
+.TP
+.B window
+.br
+Default link window inherited by all bearers subsequently enabled on a
+media. For more info about link window see
+.BR tipc-link (8)
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc (8),
+.BR tipc-bearer (8),
+.BR tipc-link (8),
+.BR tipc-nametable (8),
+.BR tipc-node (8),
+.BR tipc-peer (8),
+.BR tipc-socket (8)
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/man/man8/tipc-nametable.8 b/man/man8/tipc-nametable.8
new file mode 100644
index 00000000..4bcefe47
--- /dev/null
+++ b/man/man8/tipc-nametable.8
@@ -0,0 +1,100 @@
+.TH TIPC-NAMETABLE 8 "02 Jun 2015" "iproute2" "Linux"
+
+.\" For consistency, please keep padding right aligned.
+.\" For example '.B "foo " bar' and not '.B foo " bar"'
+
+.SH NAME
+tipc-nametable \- show TIPC nametable
+
+.SH SYNOPSIS
+.ad l
+.in +8
+
+.ti -8
+.B tipc nametable show
+.br
+
+.SH OPTIONS
+Options (flags) that can be passed anywhere in the command chain.
+.TP
+.BR "\-h" , " --help"
+Show help about last valid command. For example
+.B tipc nametable --help
+will show nametable help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+
+.SH DESCRIPTION
+The nametable shows TIPC publication information.
+
+.SS Nametable format
+
+.TP
+.B Type
+.br
+The 32-bit type field of the port name. The type field often indicates the class of service
+provided by a port.
+
+.TP
+.B Lower
+.br
+The lower bound of the 32-bit instance field of the port name.
+The instance field is often used as as a sub-class indicator.
+
+.TP
+.B Upper
+.br
+The upper bound of the 32-bit instance field of the port name.
+The instance field is often used as as a sub-class indicator.
+A difference in
+.BR "lower " "and " upper
+means the socket is bound to the port name range [lower,upper]
+
+.TP
+.B Port Identity
+.br
+The unique socket (port) identifier within the TIPC cluster. The
+.B port identity
+consists of a node identity followed by a socket reference number.
+
+.TP
+.B Publication
+.br
+The
+.B publication
+ID is a random number used internally to represent a publication.
+
+.TP
+.B Scope
+.br
+The publication
+.B scope
+specifies the visibility of a bound port name.
+The
+.B scope
+can be specified to comprise three different domains:
+.BR node ", " "cluster " "and " zone.
+Applications residing within the specified
+.B scope
+can see and access the port using the displayed port name.
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc (8),
+.BR tipc-bearer (8),
+.BR tipc-link (8),
+.BR tipc-media (8),
+.BR tipc-node (8),
+.BR tipc-peer (8),
+.BR tipc-socket (8)
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/man/man8/tipc-node.8 b/man/man8/tipc-node.8
new file mode 100644
index 00000000..a72a4099
--- /dev/null
+++ b/man/man8/tipc-node.8
@@ -0,0 +1,72 @@
+.TH TIPC-NODE 8 "02 Jun 2015" "iproute2" "Linux"
+
+.\" For consistency, please keep padding right aligned.
+.\" For example '.B "foo " bar' and not '.B foo " bar"'
+
+.SH NAME
+tipc-node \- modify and show local node parameters or list peer nodes
+
+.SH SYNOPSIS
+.ad l
+.in +8
+
+.ti -8
+.B tipc node set
+.RB "{ " "address "
+.IR ADDRESS
+.RB "| " netid
+.IR NETID
+.RB "} "
+
+.ti -8
+.B tipc node get
+.RB "{ " "address" " | " netid " } "
+
+.ti -8
+.B tipc node list
+.br
+
+.SH OPTIONS
+Options (flags) that can be passed anywhere in the command chain.
+.TP
+.BR "\-h" , " --help"
+Show help about last valid command. For example
+.B tipc node --help
+will show node help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+.SH DESCRIPTION
+
+.SS Node parameters
+.TP
+.BI address
+.br
+The TIPC logical address. On the form x.y.z where x, y and z are unsigned
+integers.
+
+.TP
+.BI netid
+.br
+Network identity. Can by used to create individual TIPC clusters on the same
+media.
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc (8),
+.BR tipc-bearer (8),
+.BR tipc-link (8),
+.BR tipc-media (8),
+.BR tipc-nametable (8),
+.BR tipc-peer (8),
+.BR tipc-socket (8)
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/man/man8/tipc-peer.8 b/man/man8/tipc-peer.8
new file mode 100644
index 00000000..430651f7
--- /dev/null
+++ b/man/man8/tipc-peer.8
@@ -0,0 +1,52 @@
+.TH TIPC-PEER 8 "04 Dec 2015" "iproute2" "Linux"
+
+.\" For consistency, please keep padding right aligned.
+.\" For example '.B "foo " bar' and not '.B foo " bar"'
+
+.SH NAME
+tipc-peer \- modify peer information
+
+.SH SYNOPSIS
+.ad l
+.in +8
+
+.ti -8
+.B tipc peer remove address
+.IR ADDRESS
+
+.SH OPTIONS
+Options (flags) that can be passed anywhere in the command chain.
+.TP
+.BR "\-h" , " --help"
+Show help about last valid command. For example
+.B tipc peer --help
+will show peer help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+.SH DESCRIPTION
+
+.SS Peer remove
+Remove an offline peer node from the local data structures. The peer is
+identified by its
+.B address
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc (8),
+.BR tipc-bearer (8),
+.BR tipc-link (8),
+.BR tipc-media (8),
+.BR tipc-nametable (8),
+.BR tipc-node (8),
+.BR tipc-socket (8)
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/man/man8/tipc-socket.8 b/man/man8/tipc-socket.8
new file mode 100644
index 00000000..23ec1e51
--- /dev/null
+++ b/man/man8/tipc-socket.8
@@ -0,0 +1,59 @@
+.TH TIPC-SOCKET 8 "02 Jun 2015" "iproute2" "Linux"
+
+.\" For consistency, please keep padding right aligned.
+.\" For example '.B "foo " bar' and not '.B foo " bar"'
+
+.SH NAME
+tipc-socket \- show TIPC socket (port) information
+
+.SH SYNOPSIS
+.ad l
+.in +8
+
+.ti -8
+.B tipc socket list
+
+.SH OPTIONS
+Options (flags) that can be passed anywhere in the command chain.
+.TP
+.BR "\-h" , " --help"
+Show help about last valid command. For example
+.B tipc socket --help
+will show socket help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+
+.SH DESCRIPTION
+A TIPC socket is represented by an unsigned integer.
+
+.TP
+.SS Bound state
+A bound socket has a logical TIPC port name associated with it.
+
+.TP
+.SS Connected state
+A connected socket is directly connected to another socket creating a point
+to point connection between TIPC sockets. If the connection to X was made using
+a logical port name Y that name will show up as
+.BR "connected to " "X " "via " Y
+.
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc (8),
+.BR tipc-bearer (8)
+.BR tipc-link (8),
+.BR tipc-media (8),
+.BR tipc-nametable (8),
+.BR tipc-node (8),
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/man/man8/tipc.8 b/man/man8/tipc.8
new file mode 100644
index 00000000..32943fa5
--- /dev/null
+++ b/man/man8/tipc.8
@@ -0,0 +1,100 @@
+.TH TIPC 8 "02 Jun 2015" "iproute2" "Linux"
+.SH NAME
+tipc \- a TIPC configuration and management tool
+.SH SYNOPSIS
+
+.ad l
+.in +8
+.ti -8
+.B tipc
+.RI "[ " OPTIONS " ] " COMMAND " " ARGUMENTS "
+.sp
+
+.ti -8
+.IR COMMAND " := { "
+.BR bearer " | " link " | " media " | " nametable " | " node " | " socket " }
+.sp
+
+.ti -8
+.IR OPTIONS " := { "
+\fB\-h\fR[\fIhelp\fR] }
+
+.SH DESCRIPTION
+The Transparent Inter-Process Communication (TIPC) protocol offers total address
+transparency between processes which allows applications in a clustered computer
+environment to communicate quickly and reliably with each other, regardless of
+their location within the cluster.
+
+TIPC originated at the telecommunications manufacturer Ericsson. The first open
+source version of TIPC was created in 2000 when Ericsson released its first
+Linux version of TIPC. TIPC was introduced in the mainline Linux kernel in 2006
+and is now widely used both within and outside of Ericsson.
+
+.SH OPTIONS
+
+.TP
+.BR "\-h" , " --help"
+Show help about last given command. For example
+.B tipc bearer --help
+will show bearer help and
+.B tipc --help
+will show general help. The position of the option in the string is irrelevant.
+
+.SH COMMANDS
+
+.TP
+.B BEARER
+- Show or modify TIPC bearers
+
+.TP
+.B LINK
+- Show or modify TIPC links
+
+.TP
+.B MEDIA
+- Show or modify TIPC media
+
+.TP
+.B NAMETABLE
+- Show TIPC nametable
+
+.TP
+.B NODE
+- Show or modify TIPC node parameters
+
+.TP
+.B SOCKET
+- Show TIPC sockets
+
+.SH ARGUMENTS
+
+Command arguments are described in a command specific man page and typically
+consists of nested commands along with key value pairs.
+If no arguments are given a command typically shows its help text. The explicit
+help option
+.B -h
+or
+.B --help
+can occur anywhere among the arguments and will show help for the last valid
+command given.
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH SEE ALSO
+.BR tipc-bearer (8),
+.BR tipc-link (8),
+.BR tipc-media (8),
+.BR tipc-nametable (8),
+.BR tipc-node (8),
+.BR tipc-peer (8),
+.BR tipc-socket (8)
+.br
+.SH REPORTING BUGS
+Report any bugs to the Network Developers mailing list
+.B <netdev@vger.kernel.org>
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Richard Alpe <richard.alpe@ericsson.com>
diff --git a/misc/Makefile b/misc/Makefile
index b7ecba90..f50e7403 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -1,13 +1,17 @@
SSOBJ=ss.o ssfilter.o
LNSTATOBJ=lnstat.o lnstat_util.o
-TARGETS=ss nstat ifstat rtacct arpd lnstat
+TARGETS=ss nstat ifstat rtacct lnstat
include ../Config
+ifeq ($(HAVE_BERKELEY_DB),y)
+ TARGETS += arpd
+endif
+
ifeq ($(HAVE_SELINUX),y)
- LDLIBS += $(shell pkg-config --libs libselinux)
- CFLAGS += $(shell pkg-config --cflags libselinux) -DHAVE_SELINUX
+ LDLIBS += $(shell $(PKG_CONFIG) --libs libselinux)
+ CFLAGS += $(shell $(PKG_CONFIG) --cflags libselinux) -DHAVE_SELINUX
endif
ifeq ($(IP_CONFIG_SETNS),y)
@@ -19,7 +23,7 @@ all: $(TARGETS)
ss: $(SSOBJ)
nstat: nstat.c
- $(CC) $(CFLAGS) $(LDFLAGS) -o nstat nstat.c -lm
+ $(CC) $(CFLAGS) $(LDFLAGS) -o nstat nstat.c $(LIBNETLINK) -lm
ifstat: ifstat.c
$(CC) $(CFLAGS) $(LDFLAGS) -o ifstat ifstat.c $(LIBNETLINK) -lm
diff --git a/misc/arpd.c b/misc/arpd.c
index 7919eb8b..6bb9bd16 100644
--- a/misc/arpd.c
+++ b/misc/arpd.c
@@ -703,7 +703,7 @@ int main(int argc, char **argv)
}
buf[sizeof(buf)-1] = 0;
- while (fgets(buf, sizeof(buf)-1, fp)) {
+ while (fgets(buf, sizeof(buf), fp)) {
__u8 b1[6];
char ipbuf[128];
char macbuf[128];
diff --git a/misc/ifstat.c b/misc/ifstat.c
index ab2cbc74..ac5c29c8 100644
--- a/misc/ifstat.c
+++ b/misc/ifstat.c
@@ -29,6 +29,7 @@
#include <getopt.h>
#include <libnetlink.h>
+#include <json_writer.h>
#include <linux/if.h>
#include <linux/if_link.h>
@@ -43,6 +44,7 @@ int no_update = 0;
int scan_interval = 0;
int time_constant = 0;
int show_errors = 0;
+int pretty;
double W;
char **patterns;
int npatterns;
@@ -238,13 +240,15 @@ static void load_raw_table(FILE *fp)
static void dump_raw_db(FILE *fp, int to_hist)
{
+ json_writer_t *jw = json_output ? jsonw_new(fp) : NULL;
struct ifstat_ent *n, *h;
- const char *eol = "\n";
h = hist_db;
- if (json_output)
- fprintf(fp, "{ \"%s\":{", info_source);
- else
+ if (jw) {
+ jsonw_pretty(jw, pretty);
+ jsonw_name(jw, info_source);
+ jsonw_start_object(jw);
+ } else
fprintf(fp, "#%s\n", info_source);
for (n=kern_db; n; n=n->next) {
@@ -265,14 +269,13 @@ static void dump_raw_db(FILE *fp, int to_hist)
}
}
- if (json_output) {
- fprintf(fp, "%s \"%s\":{",
- eol, n->name);
- eol = ",\n";
+ if (jw) {
+ jsonw_name(jw, n->name);
+ jsonw_start_object(jw);
+
for (i=0; i<MAXS && stats[i]; i++)
- fprintf(fp, " \"%s\":%llu",
- stats[i], vals[i]);
- fprintf(fp, "}");
+ jsonw_uint_field(jw, stats[i], vals[i]);
+ jsonw_end_object(jw);
} else {
fprintf(fp, "%d %s ", n->ifindex, n->name);
for (i=0; i<MAXS; i++)
@@ -281,6 +284,10 @@ static void dump_raw_db(FILE *fp, int to_hist)
fprintf(fp, "\n");
}
}
+ if (jw) {
+ jsonw_end_object(jw);
+ jsonw_destroy(&jw);
+ }
}
/* use communication definitions of meg/kilo etc */
@@ -373,20 +380,18 @@ static void print_head(FILE *fp)
}
}
-static void print_one_json(FILE *fp, const struct ifstat_ent *n,
+static void print_one_json(json_writer_t *jw, const struct ifstat_ent *n,
const unsigned long long *vals)
{
- int i, m;
- const char *sep = " ";
-
- m = show_errors ? 20 : 10;
- fprintf(fp, " \"%s\":{", n->name);
- for (i=0; i < m && stats[i]; i++) {
- fprintf(fp, "%s\"%s\":%llu",
- sep, stats[i], vals[i]);
- sep = ", ";
- }
- fprintf(fp, " }");
+ int i, m = show_errors ? 20 : 10;
+
+ jsonw_name(jw, n->name);
+ jsonw_start_object(jw);
+
+ for (i=0; i < m && stats[i]; i++)
+ jsonw_uint_field(jw, stats[i], vals[i]);
+
+ jsonw_end_object(jw);
}
static void print_one_if(FILE *fp, const struct ifstat_ent *n,
@@ -439,39 +444,40 @@ static void print_one_if(FILE *fp, const struct ifstat_ent *n,
static void dump_kern_db(FILE *fp)
{
+ json_writer_t *jw = json_output ? jsonw_new(fp) : NULL;
struct ifstat_ent *n;
- const char *eol = "\n";
- if (json_output)
- fprintf(fp, "{ \"%s\": {", info_source);
- else
+ if (jw) {
+ jsonw_pretty(jw, pretty);
+ jsonw_name(jw, info_source);
+ jsonw_start_object(jw);
+ } else
print_head(fp);
for (n=kern_db; n; n=n->next) {
if (!match(n->name))
continue;
- if (json_output) {
- fprintf(fp, "%s", eol);
- eol = ",\n";
- print_one_json(fp, n, n->val);
- } else
+ if (jw)
+ print_one_json(jw, n, n->val);
+ else
print_one_if(fp, n, n->val);
}
if (json_output)
fprintf(fp, "\n} }\n");
}
-
static void dump_incr_db(FILE *fp)
{
struct ifstat_ent *n, *h;
- const char *eol = "\n";
+ json_writer_t *jw = json_output ? jsonw_new(fp) : NULL;
h = hist_db;
- if (json_output)
- fprintf(fp, "{ \"%s\":{", info_source);
- else
+ if (jw) {
+ jsonw_pretty(jw, pretty);
+ jsonw_name(jw, info_source);
+ jsonw_start_object(jw);
+ } else
print_head(fp);
for (n=kern_db; n; n=n->next) {
@@ -492,17 +498,17 @@ static void dump_incr_db(FILE *fp)
if (!match(n->name))
continue;
- if (json_output) {
- fprintf(fp, "%s", eol);
- eol = ",\n";
- print_one_json(fp, n, n->val);
- } else
+ if (jw)
+ print_one_json(jw, n, n->val);
+ else
print_one_if(fp, n, vals);
}
- if (json_output)
- fprintf(fp, "\n} }\n");
-}
+ if (jw) {
+ jsonw_end_object(jw);
+ jsonw_destroy(&jw);
+ }
+}
static int children;
@@ -646,6 +652,7 @@ static void usage(void)
" -e, --errors show errors\n"
" -j, --json format output in JSON\n"
" -n, --nooutput do history only\n"
+" -p, --pretty pretty print\n"
" -r, --reset reset history\n"
" -s, --noupdate don\'t update history\n"
" -t, --interval=SECS report average over the last SECS\n"
@@ -663,6 +670,7 @@ static const struct option longopts[] = {
{ "nooutput", 0, 0, 'n' },
{ "json", 0, 0, 'j' },
{ "reset", 0, 0, 'r' },
+ { "pretty", 0, 0, 'p' },
{ "noupdate", 0, 0, 's' },
{ "interval", 1, 0, 't' },
{ "version", 0, 0, 'V' },
@@ -678,7 +686,7 @@ int main(int argc, char *argv[])
int ch;
int fd;
- while ((ch = getopt_long(argc, argv, "hjvVzrnasd:t:e",
+ while ((ch = getopt_long(argc, argv, "hjpvVzrnasd:t:e",
longopts, NULL)) != EOF) {
switch(ch) {
case 'z':
@@ -702,6 +710,9 @@ int main(int argc, char *argv[])
case 'j':
json_output = 1;
break;
+ case 'p':
+ pretty = 1;
+ break;
case 'd':
scan_interval = atoi(optarg) * 1000;
if (scan_interval <= 0) {
@@ -808,7 +819,8 @@ int main(int argc, char *argv[])
}
if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
fprintf(stderr, "ifstat: history is aged out, resetting\n");
- ftruncate(fileno(hist_fp), 0);
+ if (ftruncate(fileno(hist_fp), 0))
+ perror("ifstat: ftruncate");
}
}
@@ -851,7 +863,8 @@ int main(int argc, char *argv[])
}
if (!no_update) {
- ftruncate(fileno(hist_fp), 0);
+ if (ftruncate(fileno(hist_fp), 0))
+ perror("ifstat: ftruncate");
rewind(hist_fp);
json_output = 0;
diff --git a/misc/lnstat.c b/misc/lnstat.c
index 32b5cbe5..264c9531 100644
--- a/misc/lnstat.c
+++ b/misc/lnstat.c
@@ -36,6 +36,7 @@
#include <string.h>
#include <getopt.h>
+#include <json_writer.h>
#include "lnstat.h"
static struct option opts[] = {
@@ -49,6 +50,7 @@ static struct option opts[] = {
{ "keys", 1, NULL, 'k' },
{ "subject", 1, NULL, 's' },
{ "width", 1, NULL, 'w' },
+ { "oneline", 0, NULL, 0 },
};
static int usage(char *name, int exit_code)
@@ -71,7 +73,10 @@ static int usage(char *name, int exit_code)
fprintf(stderr, "\t-i --interval <intv>\t"
"Set interval to 'intv' seconds\n");
fprintf(stderr, "\t-k --keys k,k,k,...\tDisplay only keys specified\n");
- fprintf(stderr, "\t-s --subject [0-2]\t?\n");
+ fprintf(stderr, "\t-s --subject [0-2]\tControl header printing:\n");
+ fprintf(stderr, "\t\t\t\t0 = never\n");
+ fprintf(stderr, "\t\t\t\t1 = once\n");
+ fprintf(stderr, "\t\t\t\t2 = every 20 lines (default))\n");
fprintf(stderr, "\t-w --width n,n,n,...\tWidth for each field\n");
fprintf(stderr, "\n");
@@ -107,25 +112,17 @@ static void print_line(FILE *of, const struct lnstat_file *lnstat_files,
static void print_json(FILE *of, const struct lnstat_file *lnstat_files,
const struct field_params *fp)
{
+ json_writer_t *jw = jsonw_new(of);
int i;
- const char *sep;
- const char *base = NULL;
- fputs("{\n", of);
+ jsonw_start_object(jw);
for (i = 0; i < fp->num; i++) {
const struct lnstat_field *lf = fp->params[i].lf;
- if (!base || lf->file->basename != base) {
- if (base) fputs("},\n", of);
- base = lf->file->basename;
- sep = "\n\t";
- fprintf(of, " \"%s\":{", base);
- }
- fprintf(of, "%s\"%s\":%lu", sep,
- lf->name, lf->result);
- sep = ",\n\t";
+ jsonw_uint_field(jw, lf->name, lf->result);
}
- fputs("}\n}\n", of);
+ jsonw_end_object(jw);
+ jsonw_destroy(&jw);
}
/* find lnstat_field according to user specification */
@@ -249,7 +246,7 @@ int main(int argc, char **argv)
MODE_JSON,
MODE_NORMAL,
} mode = MODE_NORMAL;
- unsigned long count = 1;
+ unsigned long count = 0;
struct table_hdr *header;
static struct field_params fp;
int num_req_files = 0;
@@ -272,7 +269,7 @@ int main(int argc, char **argv)
num_req_files = 1;
}
- while ((c = getopt_long(argc, argv,"Vc:djf:h?i:k:s:w:",
+ while ((c = getopt_long(argc, argv,"Vc:djpf:h?i:k:s:w:",
opts, NULL)) != -1) {
int len = 0;
char *tmp, *tok;
@@ -347,7 +344,7 @@ int main(int argc, char **argv)
switch (mode) {
case MODE_DUMP:
- lnstat_dump(stderr, lnstat_files);
+ lnstat_dump(stdout, lnstat_files);
break;
case MODE_NORMAL:
@@ -362,18 +359,18 @@ int main(int argc, char **argv)
if (interval < 1 )
interval = 1;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < count || !count; i++) {
lnstat_update(lnstat_files);
if (mode == MODE_JSON)
print_json(stdout, lnstat_files, &fp);
else {
- if ((hdr > 1 &&
- (! (i % 20))) || (hdr == 1 && i == 0))
+ if ((hdr > 1 && !(i % 20)) ||
+ (hdr == 1 && i == 0))
print_hdr(stdout, header);
print_line(stdout, lnstat_files, &fp);
}
fflush(stdout);
- if (i < count - 1)
+ if (i < count - 1 || !count)
sleep(interval);
}
break;
@@ -381,4 +378,3 @@ int main(int argc, char **argv)
return 1;
}
-
diff --git a/misc/lnstat_util.c b/misc/lnstat_util.c
index 9492baf0..a2583665 100644
--- a/misc/lnstat_util.c
+++ b/misc/lnstat_util.c
@@ -38,18 +38,22 @@
/* Read (and summarize for SMP) the different stats vars. */
static int scan_lines(struct lnstat_file *lf, int i)
{
+ char buf[FGETS_BUF_SIZE];
int j, num_lines = 0;
for (j = 0; j < lf->num_fields; j++)
lf->fields[j].values[i] = 0;
- while(!feof(lf->fp)) {
- char buf[FGETS_BUF_SIZE];
+ rewind(lf->fp);
+ /* skip first line */
+ if (!lf->compat && !fgets(buf, sizeof(buf)-1, lf->fp))
+ return -1;
+
+ while(!feof(lf->fp) && fgets(buf, sizeof(buf)-1, lf->fp)) {
char *ptr = buf;
num_lines++;
- fgets(buf, sizeof(buf)-1, lf->fp);
gettimeofday(&lf->last_read, NULL);
for (j = 0; j < lf->num_fields; j++) {
@@ -81,7 +85,6 @@ static int time_after(struct timeval *last,
int lnstat_update(struct lnstat_file *lnstat_files)
{
struct lnstat_file *lf;
- char buf[FGETS_BUF_SIZE];
struct timeval tv;
gettimeofday(&tv, NULL);
@@ -91,11 +94,6 @@ int lnstat_update(struct lnstat_file *lnstat_files)
int i;
struct lnstat_field *lfi;
- rewind(lf->fp);
- if (!lf->compat) {
- /* skip first line */
- fgets(buf, sizeof(buf)-1, lf->fp);
- }
scan_lines(lf, 1);
for (i = 0, lfi = &lf->fields[i];
@@ -107,8 +105,6 @@ int lnstat_update(struct lnstat_file *lnstat_files)
/ lf->interval.tv_sec;
}
- rewind(lf->fp);
- fgets(buf, sizeof(buf)-1, lf->fp);
scan_lines(lf, 0);
}
}
@@ -142,7 +138,8 @@ static int lnstat_scan_fields(struct lnstat_file *lf)
char buf[FGETS_BUF_SIZE];
rewind(lf->fp);
- fgets(buf, sizeof(buf)-1, lf->fp);
+ if (!fgets(buf, sizeof(buf)-1, lf->fp))
+ return -1;
return __lnstat_scan_fields(lf, buf);
}
@@ -175,8 +172,10 @@ static struct lnstat_file *alloc_and_open(const char *path, const char *file)
/* allocate */
lf = malloc(sizeof(*lf));
- if (!lf)
+ if (!lf) {
+ fprintf(stderr, "out of memory\n");
return NULL;
+ }
/* initialize */
memset(lf, 0, sizeof(*lf));
@@ -193,6 +192,7 @@ static struct lnstat_file *alloc_and_open(const char *path, const char *file)
/* open */
lf->fp = fopen(lf->path, "r");
if (!lf->fp) {
+ perror(lf->path);
free(lf);
return NULL;
}
@@ -259,12 +259,16 @@ struct lnstat_file *lnstat_scan_dir(const char *path, const int num_req_files,
continue;
lf = alloc_and_open(path, de->d_name);
- if (!lf)
+ if (!lf) {
+ closedir(dir);
return NULL;
+ }
/* fill in field structure */
- if (lnstat_scan_fields(lf) < 0)
+ if (lnstat_scan_fields(lf) < 0) {
+ closedir(dir);
return NULL;
+ }
/* prepend to global list */
lf->next = lnstat_files;
@@ -322,8 +326,7 @@ struct lnstat_field *lnstat_find_field(struct lnstat_file *lnstat_files,
}
}
out:
- if (file)
- free(file);
+ free(file);
return ret;
}
diff --git a/misc/nstat.c b/misc/nstat.c
index c2cb0564..99705286 100644
--- a/misc/nstat.c
+++ b/misc/nstat.c
@@ -28,6 +28,7 @@
#include <math.h>
#include <getopt.h>
+#include <json_writer.h>
#include <SNAPSHOT.h>
int dump_zeros = 0;
@@ -35,6 +36,7 @@ int reset_history = 0;
int ignore_history = 0;
int no_output = 0;
int json_output = 0;
+int pretty = 0;
int no_update = 0;
int scan_interval = 0;
int time_constant = 0;
@@ -271,13 +273,15 @@ static void load_netstat(void)
static void dump_kern_db(FILE *fp, int to_hist)
{
+ json_writer_t *jw = json_output ? jsonw_new(fp) : NULL;
struct nstat_ent *n, *h;
- const char *eol = "\n";
h = hist_db;
- if (json_output)
- fprintf(fp, "{ \"%s\":{", info_source);
- else
+ if (jw) {
+ jsonw_pretty(jw, pretty);
+ jsonw_name(jw, info_source);
+ jsonw_start_object(jw);
+ } else
fprintf(fp, "#%s\n", info_source);
for (n=kern_db; n; n=n->next) {
@@ -297,26 +301,29 @@ static void dump_kern_db(FILE *fp, int to_hist)
}
}
- if (json_output) {
- fprintf(fp, "%s \"%s\":%llu",
- eol, n->id, val);
- eol = ",\n";
- } else
+ if (jw)
+ jsonw_uint_field(jw, n->id, val);
+ else
fprintf(fp, "%-32s%-16llu%6.1f\n", n->id, val, n->rate);
}
- if (json_output)
- fprintf(fp, "\n} }\n");
+
+ if (jw) {
+ jsonw_end_object(jw);
+ jsonw_destroy(&jw);
+ }
}
static void dump_incr_db(FILE *fp)
{
+ json_writer_t *jw = json_output ? jsonw_new(fp) : NULL;
struct nstat_ent *n, *h;
- const char *eol = "\n";
h = hist_db;
- if (json_output)
- fprintf(fp, "{ \"%s\":{", info_source);
- else
+ if (jw) {
+ jsonw_pretty(jw, pretty);
+ jsonw_name(jw, info_source);
+ jsonw_start_object(jw);
+ } else
fprintf(fp, "#%s\n", info_source);
for (n=kern_db; n; n=n->next) {
@@ -339,16 +346,17 @@ static void dump_incr_db(FILE *fp)
if (!match(n->id))
continue;
- if (json_output) {
- fprintf(fp, "%s \"%s\":%llu",
- eol, n->id, val);
- eol = ",\n";
- } else
+ if (jw)
+ jsonw_uint_field(jw, n->id, val);
+ else
fprintf(fp, "%-32s%-16llu%6.1f%s\n", n->id, val,
n->rate, ovfl?" (overflow)":"");
}
- if (json_output)
- fprintf(fp, "\n} }\n");
+
+ if (jw) {
+ jsonw_end_object(jw);
+ jsonw_destroy(&jw);
+ }
}
static int children;
@@ -485,6 +493,7 @@ static void usage(void)
" -d, --scan=SECS sample every statistics every SECS\n"
" -j, --json format output in JSON\n"
" -n, --nooutput do history only\n"
+" -p, --pretty pretty print\n"
" -r, --reset reset history\n"
" -s, --noupdate don\'t update history\n"
" -t, --interval=SECS report average over the last SECS\n"
@@ -501,6 +510,7 @@ static const struct option longopts[] = {
{ "json", 0, 0, 'j' },
{ "reset", 0, 0, 'r' },
{ "noupdate", 0, 0, 's' },
+ { "pretty", 0, 0, 'p' },
{ "interval", 1, 0, 't' },
{ "version", 0, 0, 'V' },
{ "zeros", 0, 0, 'z' },
@@ -515,7 +525,7 @@ int main(int argc, char *argv[])
int ch;
int fd;
- while ((ch = getopt_long(argc, argv, "h?vVzrnasd:t:j",
+ while ((ch = getopt_long(argc, argv, "h?vVzrnasd:t:jp",
longopts, NULL)) != EOF) {
switch(ch) {
case 'z':
@@ -546,6 +556,9 @@ int main(int argc, char *argv[])
case 'j':
json_output = 1;
break;
+ case 'p':
+ pretty = 1;
+ break;
case 'v':
case 'V':
printf("nstat utility, iproute2-ss%s\n", SNAPSHOT);
@@ -636,7 +649,8 @@ int main(int argc, char *argv[])
}
if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
fprintf(stderr, "nstat: history is aged out, resetting\n");
- ftruncate(fileno(hist_fp), 0);
+ if (ftruncate(fileno(hist_fp), 0) < 0)
+ perror("nstat: ftruncate");
}
}
@@ -680,7 +694,8 @@ int main(int argc, char *argv[])
dump_incr_db(stdout);
}
if (!no_update) {
- ftruncate(fileno(hist_fp), 0);
+ if (ftruncate(fileno(hist_fp), 0) < 0)
+ perror("nstat: ftruncate");
rewind(hist_fp);
json_output = 0;
diff --git a/misc/ss.c b/misc/ss.c
index 954a30bd..13fcc8f6 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -99,6 +99,7 @@ int show_proc_ctx = 0;
int show_sock_ctx = 0;
/* If show_users & show_proc_ctx only do user_ent_hash_build() once */
int user_ent_hash_build_init = 0;
+int follow_events = 0;
int netid_width;
int state_width;
@@ -159,6 +160,7 @@ struct filter
int states;
int families;
struct ssfilter *f;
+ bool kill;
};
static const struct filter default_dbs[MAX_DB] = {
@@ -233,14 +235,12 @@ static struct filter current_filter;
static void filter_db_set(struct filter *f, int db)
{
f->states |= default_dbs[db].states;
- f->families |= default_dbs[db].families;
f->dbs |= 1 << db;
do_default = 0;
}
static void filter_af_set(struct filter *f, int af)
{
- f->dbs |= default_afs[af].dbs;
f->states |= default_afs[af].states;
f->families |= 1 << af;
do_default = 0;
@@ -266,21 +266,31 @@ static void filter_default_dbs(struct filter *f)
filter_db_set(f, NETLINK_DB);
}
-static void filter_merge(struct filter *af, struct filter *dbf, int states)
+static void filter_states_set(struct filter *f, int states)
{
- if (af->families)
- af->families = (af->families | dbf->families) & af->families;
- else
- af->families = dbf->families;
+ if (states)
+ f->states = (f->states | states) & states;
+}
+
+static void filter_merge_defaults(struct filter *f)
+{
+ int db;
+ int af;
- if (dbf->dbs)
- af->dbs = (af->dbs | dbf->dbs) & dbf->dbs;
+ for (db = 0; db < MAX_DB; db++) {
+ if (!(f->dbs & (1 << db)))
+ continue;
- if (dbf->states)
- af->states = (af->states | dbf->states) & dbf->states;
+ if (!(default_dbs[db].families & f->families))
+ f->families |= default_dbs[db].families;
+ }
+ for (af = 0; af < AF_MAX; af++) {
+ if (!(f->families & (1 << af)))
+ continue;
- if (states)
- af->states = (af->states | states) & states;
+ if (!(default_afs[af].dbs & f->dbs))
+ f->dbs |= default_afs[af].dbs;
+ }
}
static FILE *generic_proc_open(const char *env, const char *name)
@@ -448,7 +458,9 @@ static void user_ent_hash_build(void)
user_ent_hash_build_init = 1;
- strcpy(name, root);
+ strncpy(name, root, sizeof(name)-1);
+ name[sizeof(name)-1] = 0;
+
if (strlen(name) == 0 || name[strlen(name)-1] != '/')
strcat(name, "/");
@@ -472,10 +484,12 @@ static void user_ent_hash_build(void)
if (getpidcon(pid, &pid_context) != 0)
pid_context = strdup(no_ctx);
- sprintf(name + nameoff, "%d/fd/", pid);
+ snprintf(name + nameoff, sizeof(name) - nameoff, "%d/fd/", pid);
pos = strlen(name);
- if ((dir1 = opendir(name)) == NULL)
+ if ((dir1 = opendir(name)) == NULL) {
+ free(pid_context);
continue;
+ }
process[0] = '\0';
p = process;
@@ -491,7 +505,7 @@ static void user_ent_hash_build(void)
if (sscanf(d1->d_name, "%d%c", &fd, &crap) != 1)
continue;
- sprintf(name+pos, "%d", fd);
+ snprintf(name+pos, sizeof(name) - pos, "%d", fd);
link_len = readlink(name, lnk, sizeof(lnk)-1);
if (link_len == -1)
@@ -515,7 +529,8 @@ static void user_ent_hash_build(void)
snprintf(tmp, sizeof(tmp), "%s/%d/stat",
root, pid);
if ((fp = fopen(tmp, "r")) != NULL) {
- fscanf(fp, "%*d (%[^)])", p);
+ if (fscanf(fp, "%*d (%[^)])", p) < 1)
+ ; /* ignore */
fclose(fp);
}
}
@@ -541,7 +556,7 @@ static int find_entry(unsigned ino, char **buf, int type)
struct user_ent *p;
int cnt = 0;
char *ptr;
- char **new_buf = buf;
+ char *new_buf;
int len, new_buf_len;
int buf_used = 0;
int buf_len = 0;
@@ -583,12 +598,12 @@ static int find_entry(unsigned ino, char **buf, int type)
if (len < 0 || len >= buf_len - buf_used) {
new_buf_len = buf_len + ENTRY_BUF_SIZE;
- *new_buf = realloc(*buf, new_buf_len);
+ new_buf = realloc(*buf, new_buf_len);
if (!new_buf) {
fprintf(stderr, "ss: failed to malloc buffer\n");
abort();
}
- **buf = **new_buf;
+ *buf = new_buf;
buf_len = new_buf_len;
continue;
} else {
@@ -647,7 +662,10 @@ static int get_slabstat(struct slabstat *s)
cnt = sizeof(*s)/sizeof(int);
- fgets(buf, sizeof(buf), fp);
+ if (!fgets(buf, sizeof(buf), fp)) {
+ fclose(fp);
+ return -1;
+ }
while(fgets(buf, sizeof(buf), fp) != NULL) {
int i;
for (i=0; i<sizeof(slabstat_ids)/sizeof(slabstat_ids[0]); i++) {
@@ -667,18 +685,6 @@ static int get_slabstat(struct slabstat *s)
return 0;
}
-static inline void sock_addr_set_str(inet_prefix *prefix, char **ptr)
-{
- memcpy(prefix->data, ptr, sizeof(char *));
-}
-
-static inline char *sock_addr_get_str(const inet_prefix *prefix)
-{
- char *tmp ;
- memcpy(&tmp, prefix->data, sizeof(char *));
- return tmp;
-}
-
static unsigned long long cookie_sk_get(const uint32_t *cookie)
{
return (((unsigned long long)cookie[1] << 31) << 1) | cookie[0];
@@ -730,6 +736,8 @@ struct sockstat
int refcnt;
unsigned int iface;
unsigned long long sk;
+ char *name;
+ char *peer_name;
};
struct dctcpstat
@@ -747,7 +755,7 @@ struct tcpstat
int timer;
int timeout;
int probes;
- char *cong_alg;
+ char cong_alg[16];
double rto, ato, rtt, rttvar;
int qack, cwnd, ssthresh, backoff;
double send_bps;
@@ -759,6 +767,10 @@ struct tcpstat
unsigned int lastack;
double pacing_rate;
double pacing_rate_max;
+ unsigned long long bytes_acked;
+ unsigned long long bytes_received;
+ unsigned int segs_out;
+ unsigned int segs_in;
unsigned int unacked;
unsigned int retrans;
unsigned int retrans_total;
@@ -850,8 +862,7 @@ static const char *print_ms_timer(int timeout)
return buf;
}
-struct scache
-{
+struct scache {
struct scache *next;
int port;
char *name;
@@ -864,34 +875,39 @@ static void init_service_resolver(void)
{
char buf[128];
FILE *fp = popen("/usr/sbin/rpcinfo -p 2>/dev/null", "r");
- if (fp) {
- fgets(buf, sizeof(buf), fp);
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- unsigned int progn, port;
- char proto[128], prog[128];
- if (sscanf(buf, "%u %*d %s %u %s", &progn, proto,
- &port, prog+4) == 4) {
- struct scache *c = malloc(sizeof(*c));
- if (c) {
- c->port = port;
- memcpy(prog, "rpc.", 4);
- c->name = strdup(prog);
- if (strcmp(proto, TCP_PROTO) == 0)
- c->proto = TCP_PROTO;
- else if (strcmp(proto, UDP_PROTO) == 0)
- c->proto = UDP_PROTO;
- else
- c->proto = NULL;
- c->next = rlist;
- rlist = c;
- }
- }
- }
+
+ if (!fp)
+ return;
+
+ if (!fgets(buf, sizeof(buf), fp)) {
pclose(fp);
+ return;
}
-}
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ unsigned int progn, port;
+ char proto[128], prog[128] = "rpc.";
+ struct scache *c;
-static int ip_local_port_min, ip_local_port_max;
+ if (sscanf(buf, "%u %*d %s %u %s",
+ &progn, proto, &port, prog+4) != 4)
+ continue;
+
+ if (!(c = malloc(sizeof(*c))))
+ continue;
+
+ c->port = port;
+ c->name = strdup(prog);
+ if (strcmp(proto, TCP_PROTO) == 0)
+ c->proto = TCP_PROTO;
+ else if (strcmp(proto, UDP_PROTO) == 0)
+ c->proto = UDP_PROTO;
+ else
+ c->proto = NULL;
+ c->next = rlist;
+ rlist = c;
+ }
+ pclose(fp);
+}
/* Even do not try default linux ephemeral port ranges:
* default /etc/services contains so much of useless crap
@@ -901,19 +917,18 @@ static int ip_local_port_min, ip_local_port_max;
*/
static int is_ephemeral(int port)
{
- if (!ip_local_port_min) {
+ static int min = 0, max = 0;
+
+ if (!min) {
FILE *f = ephemeral_ports_open();
- if (f) {
- fscanf(f, "%d %d",
- &ip_local_port_min, &ip_local_port_max);
- fclose(f);
- } else {
- ip_local_port_min = 1024;
- ip_local_port_max = 4999;
+ if (!f || fscanf(f, "%d %d", &min, &max) < 2) {
+ min = 1024;
+ max = 4999;
}
+ if (f)
+ fclose(f);
}
-
- return (port >= ip_local_port_min && port<= ip_local_port_max);
+ return port >= min && port <= max;
}
@@ -941,11 +956,15 @@ static const char *__resolve_service(int port)
return NULL;
}
+#define SCACHE_BUCKETS 1024
+static struct scache *cache_htab[SCACHE_BUCKETS];
static const char *resolve_service(int port)
{
static char buf[128];
- static struct scache cache[256];
+ struct scache *c;
+ const char *res;
+ int hash;
if (port == 0) {
buf[0] = '*';
@@ -953,45 +972,35 @@ static const char *resolve_service(int port)
return buf;
}
- if (resolve_services) {
- if (dg_proto == RAW_PROTO) {
- return inet_proto_n2a(port, buf, sizeof(buf));
- } else {
- struct scache *c;
- const char *res;
- int hash = (port^(((unsigned long)dg_proto)>>2))&255;
-
- for (c = &cache[hash]; c; c = c->next) {
- if (c->port == port &&
- c->proto == dg_proto) {
- if (c->name)
- return c->name;
- goto do_numeric;
- }
- }
+ if (!resolve_services)
+ goto do_numeric;
- if ((res = __resolve_service(port)) != NULL) {
- if ((c = malloc(sizeof(*c))) == NULL)
- goto do_numeric;
- } else {
- c = &cache[hash];
- if (c->name)
- free(c->name);
- }
- c->port = port;
- c->name = NULL;
- c->proto = dg_proto;
- if (res) {
- c->name = strdup(res);
- c->next = cache[hash].next;
- cache[hash].next = c;
- }
- if (c->name)
- return c->name;
- }
+ if (dg_proto == RAW_PROTO)
+ return inet_proto_n2a(port, buf, sizeof(buf));
+
+
+ hash = (port^(((unsigned long)dg_proto)>>2)) % SCACHE_BUCKETS;
+
+ for (c = cache_htab[hash]; c; c = c->next) {
+ if (c->port == port && c->proto == dg_proto)
+ goto do_cache;
}
- do_numeric:
+ c = malloc(sizeof(*c));
+ if (!c)
+ goto do_numeric;
+ res = __resolve_service(port);
+ c->port = port;
+ c->name = res ? strdup(res) : NULL;
+ c->proto = dg_proto;
+ c->next = cache_htab[hash];
+ cache_htab[hash] = c;
+
+do_cache:
+ if (c->name)
+ return c->name;
+
+do_numeric:
sprintf(buf, "%u", port);
return buf;
}
@@ -1022,6 +1031,8 @@ static void inet_addr_print(const inet_prefix *a, int port, unsigned int ifindex
if (ifindex) {
ifname = ll_index_to_name(ifindex);
est_len -= strlen(ifname) + 1; /* +1 for percent char */
+ if (est_len < 0)
+ est_len = 0;
}
sock_addr_print_width(est_len, ap, ":", serv_width, resolve_service(port),
@@ -1057,9 +1068,9 @@ static int inet2_addr_match(const inet_prefix *a, const inet_prefix *p,
static int unix_match(const inet_prefix *a, const inet_prefix *p)
{
- char *addr = sock_addr_get_str(a);
- char *pattern = sock_addr_get_str(p);
-
+ char *addr, *pattern;
+ memcpy(&addr, a->data, sizeof(addr));
+ memcpy(&pattern, p->data, sizeof(pattern));
if (pattern == NULL)
return 1;
if (addr == NULL)
@@ -1072,26 +1083,18 @@ static int run_ssfilter(struct ssfilter *f, struct sockstat *s)
switch (f->type) {
case SSF_S_AUTO:
{
- static int low, high=65535;
-
if (s->local.family == AF_UNIX) {
- char *p = sock_addr_get_str(&s->local);
+ char *p;
+ memcpy(&p, s->local.data, sizeof(p));
return p == NULL || (p[0] == '@' && strlen(p) == 6 &&
strspn(p+1, "0123456789abcdef") == 5);
}
if (s->local.family == AF_PACKET)
- return s->lport == 0 && s->local.data == 0;
+ return s->lport == 0 && s->local.data[0] == 0;
if (s->local.family == AF_NETLINK)
return s->lport < 0;
- if (!low) {
- FILE *fp = ephemeral_ports_open();
- if (fp) {
- fscanf(fp, "%d%d", &low, &high);
- fclose(fp);
- }
- }
- return s->lport >= low && s->lport <= high;
+ return is_ephemeral(s->lport);
}
case SSF_DCOND:
{
@@ -1395,7 +1398,7 @@ void *parse_hostcond(char *addr, bool is_port)
addr+=5;
p = strdup(addr);
a.addr.bitlen = 8*strlen(p);
- sock_addr_set_str(&a.addr, &p);
+ memcpy(a.addr.data, &p, sizeof(p));
fam = AF_UNIX;
goto out;
}
@@ -1534,7 +1537,7 @@ out:
if (fam != AF_UNSPEC) {
f->families = 0;
filter_af_set(f, fam);
- filter_merge(f, f, 0);
+ filter_states_set(f, 0);
}
res = malloc(sizeof(*res));
@@ -1546,6 +1549,8 @@ out:
static char *proto_name(int protocol)
{
switch (protocol) {
+ case 0:
+ return "raw";
case IPPROTO_UDP:
return "udp";
case IPPROTO_TCP:
@@ -1656,7 +1661,7 @@ static void tcp_stats_print(struct tcpstat *s)
printf(" ecnseen");
if (s->has_fastopen_opt)
printf(" fastopen");
- if (s->cong_alg)
+ if (s->cong_alg[0])
printf(" %s", s->cong_alg);
if (s->has_wscale_opt)
printf(" wscale:%d,%d", s->snd_wscale, s->rcv_wscale);
@@ -1676,19 +1681,28 @@ static void tcp_stats_print(struct tcpstat *s)
if (s->mss)
printf(" mss:%d", s->mss);
- if (s->cwnd && s->cwnd != 2)
+ if (s->cwnd)
printf(" cwnd:%d", s->cwnd);
if (s->ssthresh)
printf(" ssthresh:%d", s->ssthresh);
+ if (s->bytes_acked)
+ printf(" bytes_acked:%llu", s->bytes_acked);
+ if (s->bytes_received)
+ printf(" bytes_received:%llu", s->bytes_received);
+ if (s->segs_out)
+ printf(" segs_out:%u", s->segs_out);
+ if (s->segs_in)
+ printf(" segs_in:%u", s->segs_in);
+
if (s->dctcp && s->dctcp->enabled) {
struct dctcpstat *dctcp = s->dctcp;
- printf("dctcp:(ce_state:%u,alpha:%u,ab_ecn:%u,ab_tot:%u)",
+ printf(" dctcp:(ce_state:%u,alpha:%u,ab_ecn:%u,ab_tot:%u)",
dctcp->ce_state, dctcp->alpha, dctcp->ab_ecn,
dctcp->ab_tot);
} else if (s->dctcp) {
- printf("dctcp:fallback_mode");
+ printf(" dctcp:fallback_mode");
}
if (s->send_bps)
@@ -1885,8 +1899,8 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
/* workaround for older kernels with less fields */
if (len < sizeof(*info)) {
info = alloca(sizeof(*info));
- memset(info, 0, sizeof(*info));
memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
+ memset((char *)info + len, 0, sizeof(*info) - len);
} else
info = RTA_DATA(tb[INET_DIAG_INFO]);
@@ -1898,11 +1912,10 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
s.has_fastopen_opt = TCPI_HAS_OPT(info, TCPI_OPT_SYN_DATA);
}
- if (tb[INET_DIAG_CONG]) {
- const char *cong_attr = rta_getattr_str(tb[INET_DIAG_CONG]);
- s.cong_alg = malloc(strlen(cong_attr + 1));
- strcpy(s.cong_alg, cong_attr);
- }
+ if (tb[INET_DIAG_CONG])
+ strncpy(s.cong_alg,
+ rta_getattr_str(tb[INET_DIAG_CONG]),
+ sizeof(s.cong_alg) - 1);
if (TCPI_HAS_OPT(info, TCPI_OPT_WSCALE)) {
s.has_wscale_opt = true;
@@ -1973,11 +1986,12 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
info->tcpi_max_pacing_rate != ~0ULL)
s.pacing_rate_max = info->tcpi_max_pacing_rate * 8.;
}
+ s.bytes_acked = info->tcpi_bytes_acked;
+ s.bytes_received = info->tcpi_bytes_received;
+ s.segs_out = info->tcpi_segs_out;
+ s.segs_in = info->tcpi_segs_in;
tcp_stats_print(&s);
- if (s.dctcp)
- free(s.dctcp);
- if (s.cong_alg)
- free(s.cong_alg);
+ free(s.dctcp);
}
}
@@ -2013,6 +2027,9 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol)
if (f && f->f && run_ssfilter(f->f, &s) == 0)
return 0;
+ if (tb[INET_DIAG_PROTOCOL])
+ protocol = *(__u8 *)RTA_DATA(tb[INET_DIAG_PROTOCOL]);
+
inet_stats_print(&s, protocol);
if (show_options) {
@@ -2026,6 +2043,11 @@ static int inet_show_sock(struct nlmsghdr *nlh, struct filter *f, int protocol)
if (show_details) {
sock_details_print(&s);
+ if (s.local.family == AF_INET6 && tb[INET_DIAG_SKV6ONLY]) {
+ unsigned char v6only;
+ v6only = *(__u8 *)RTA_DATA(tb[INET_DIAG_SKV6ONLY]);
+ printf(" v6only:%u", v6only);
+ }
if (tb[INET_DIAG_SHUTDOWN]) {
unsigned char mask;
mask = *(__u8 *)RTA_DATA(tb[INET_DIAG_SHUTDOWN]);
@@ -2173,8 +2195,27 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
struct inet_diag_arg {
struct filter *f;
int protocol;
+ struct rtnl_handle *rth;
};
+static int kill_inet_sock(const struct sockaddr_nl *addr,
+ struct nlmsghdr *h, void *arg)
+{
+ struct inet_diag_msg *d = NLMSG_DATA(h);
+ struct inet_diag_arg *diag_arg = arg;
+ struct rtnl_handle *rth = diag_arg->rth;
+ DIAG_REQUEST(req, struct inet_diag_req_v2 r);
+
+ req.nlh.nlmsg_type = SOCK_DESTROY;
+ req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nlh.nlmsg_seq = ++rth->seq;
+ req.r.sdiag_family = d->idiag_family;
+ req.r.sdiag_protocol = diag_arg->protocol;
+ req.r.id = d->id;
+
+ return rtnl_talk(rth, &req.nlh, NULL, 0);
+}
+
static int show_one_inet_sock(const struct sockaddr_nl *addr,
struct nlmsghdr *h, void *arg)
{
@@ -2184,7 +2225,16 @@ static int show_one_inet_sock(const struct sockaddr_nl *addr,
if (!(diag_arg->f->families & (1 << r->idiag_family)))
return 0;
- if ((err = inet_show_sock(h, NULL, diag_arg->protocol)) < 0)
+ if (diag_arg->f->kill && kill_inet_sock(addr, h, arg) != 0) {
+ if (errno == EOPNOTSUPP || errno == ENOENT) {
+ /* Socket can't be closed, or is already closed. */
+ return 0;
+ } else {
+ perror("SOCK_DESTROY answers");
+ return -1;
+ }
+ }
+ if ((err = inet_show_sock(h, diag_arg->f, diag_arg->protocol)) < 0)
return err;
return 0;
@@ -2193,14 +2243,25 @@ static int show_one_inet_sock(const struct sockaddr_nl *addr,
static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol)
{
int err = 0;
- struct rtnl_handle rth;
+ struct rtnl_handle rth, rth2;
int family = PF_INET;
struct inet_diag_arg arg = { .f = f, .protocol = protocol };
if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG))
return -1;
+
+ if (f->kill) {
+ if (rtnl_open_byproto(&rth2, 0, NETLINK_SOCK_DIAG)) {
+ rtnl_close(&rth);
+ return -1;
+ }
+ arg.rth = &rth2;
+ }
+
rth.dump = MAGIC_SEQ;
rth.dump_fp = dump_fp;
+ if (preferred_family == PF_INET6)
+ family = PF_INET6;
again:
if ((err = sockdiag_send(family, rth.fd, protocol, f)))
@@ -2213,13 +2274,15 @@ again:
}
goto Exit;
}
- if (family == PF_INET) {
+ if (family == PF_INET && preferred_family != PF_INET) {
family = PF_INET6;
goto again;
}
Exit:
rtnl_close(&rth);
+ if (arg.rth)
+ rtnl_close(arg.rth);
return err;
}
@@ -2352,8 +2415,7 @@ static int tcp_show(struct filter *f, int socktype)
outerr:
do {
int saved_errno = errno;
- if (buf)
- free(buf);
+ free(buf);
if (fp)
fclose(fp);
errno = saved_errno;
@@ -2390,7 +2452,7 @@ static int dgram_show_line(char *line, const struct filter *f, int family)
if (n < 9)
opt[0] = 0;
- inet_stats_print(&s, IPPROTO_UDP);
+ inet_stats_print(&s, dg_proto == UDP_PROTO ? IPPROTO_UDP : 0);
if (show_details && opt[0])
printf(" opt:\"%s\"", opt);
@@ -2482,12 +2544,9 @@ static void unix_list_free(struct sockstat *list)
{
while (list) {
struct sockstat *s = list;
- char *name = sock_addr_get_str(&s->local);
list = list->next;
-
- if (name)
- free(name);
+ free(s->name);
free(s);
}
}
@@ -2530,7 +2589,7 @@ static bool unix_use_proc(void)
static void unix_stats_print(struct sockstat *list, struct filter *f)
{
struct sockstat *s;
- char *local, *peer;
+ char *peer;
char *ctx_buf = NULL;
bool use_proc = unix_use_proc();
char port_name[30] = {};
@@ -2541,8 +2600,9 @@ static void unix_stats_print(struct sockstat *list, struct filter *f)
if (unix_type_skip(s, f))
continue;
- local = sock_addr_get_str(&s->local);
- peer = "*";
+ peer = "*";
+ if (s->peer_name)
+ peer = s->peer_name;
if (s->rport && use_proc) {
struct sockstat *p;
@@ -2555,24 +2615,26 @@ static void unix_stats_print(struct sockstat *list, struct filter *f)
if (!p) {
peer = "?";
} else {
- peer = sock_addr_get_str(&p->local);
- peer = peer ? : "*";
+ peer = p->name ? : "*";
}
}
if (use_proc && f->f) {
+ struct sockstat st;
+ st.local.family = AF_UNIX;
+ st.remote.family = AF_UNIX;
+ memcpy(st.local.data, &s->name, sizeof(s->name));
if (strcmp(peer, "*") == 0)
- memset(s->remote.data, 0, sizeof(char *));
+ memset(st.remote.data, 0, sizeof(peer));
else
- sock_addr_set_str(&s->remote, &peer);
-
- if (run_ssfilter(f->f, s) == 0)
+ memcpy(st.remote.data, &peer, sizeof(peer));
+ if (run_ssfilter(f->f, &st) == 0)
continue;
}
sock_state_print(s, unix_netid_name(s->type));
- sock_addr_print(local ?: "*", " ",
+ sock_addr_print(s->name ?: "*", " ",
int_to_str(s->lport, port_name), NULL);
sock_addr_print(peer, " ", int_to_str(s->rport, port_name),
NULL);
@@ -2600,8 +2662,8 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
struct filter *f = (struct filter *)arg;
struct unix_diag_msg *r = NLMSG_DATA(nlh);
struct rtattr *tb[UNIX_DIAG_MAX+1];
- char *name = NULL;
- struct sockstat stat = {};
+ char name[128];
+ struct sockstat stat = { .name = "*", .peer_name = "*" };
parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr*)(r+1),
nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
@@ -2622,12 +2684,12 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
if (tb[UNIX_DIAG_NAME]) {
int len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
- name = malloc(len + 1);
memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len);
name[len] = '\0';
if (name[0] == '\0')
name[0] = '@';
- sock_addr_set_str(&stat.local, &name);
+ stat.name = &name[0];
+ memcpy(stat.local.data, &stat.name, sizeof(stat.name));
}
if (tb[UNIX_DIAG_PEER])
stat.rport = rta_getattr_u32(tb[UNIX_DIAG_PEER]);
@@ -2651,8 +2713,6 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
if (show_mem || show_details)
printf("\n");
- if (name)
- free(name);
return 0;
}
@@ -2709,18 +2769,23 @@ static int unix_show(struct filter *f)
if ((fp = net_unix_open()) == NULL)
return -1;
- fgets(buf, sizeof(buf)-1, fp);
+ if (!fgets(buf, sizeof(buf), fp)) {
+ fclose(fp);
+ return -1;
+ }
if (memcmp(buf, "Peer", 4) == 0)
newformat = 1;
cnt = 0;
- while (fgets(buf, sizeof(buf)-1, fp)) {
+ while (fgets(buf, sizeof(buf), fp)) {
struct sockstat *u, **insp;
int flags;
- if (!(u = malloc(sizeof(*u))))
+ if (!(u = calloc(1, sizeof(*u))))
break;
+ u->name = NULL;
+ u->peer_name = NULL;
if (sscanf(buf, "%x: %x %x %x %x %x %d %s",
&u->rport, &u->rq, &u->wq, &flags, &u->type,
@@ -2756,8 +2821,9 @@ static int unix_show(struct filter *f)
*insp = u;
if (name[0]) {
- char *tmp = strdup(name);
- sock_addr_set_str(&u->local, &tmp);
+ if ((u->name = malloc(strlen(name)+1)) == NULL)
+ break;
+ strcpy(u->name, name);
}
if (++cnt > MAX_UNIX_REMEMBER) {
unix_stats_print(list, f);
@@ -2826,13 +2892,27 @@ static int packet_stats_print(struct sockstat *s, const struct filter *f)
return 0;
}
+static void packet_show_ring(struct packet_diag_ring *ring)
+{
+ printf("blk_size:%d", ring->pdr_block_size);
+ printf(",blk_nr:%d", ring->pdr_block_nr);
+ printf(",frm_size:%d", ring->pdr_frame_size);
+ printf(",frm_nr:%d", ring->pdr_frame_nr);
+ printf(",tmo:%d", ring->pdr_retire_tmo);
+ printf(",features:0x%x", ring->pdr_features);
+}
+
static int packet_show_sock(const struct sockaddr_nl *addr,
struct nlmsghdr *nlh, void *arg)
{
const struct filter *f = arg;
struct packet_diag_msg *r = NLMSG_DATA(nlh);
+ struct packet_diag_info *pinfo = NULL;
+ struct packet_diag_ring *ring_rx = NULL, *ring_tx = NULL;
struct rtattr *tb[PACKET_DIAG_MAX+1];
struct sockstat stat = {};
+ uint32_t fanout = 0;
+ bool has_fanout = false;
parse_rtattr(tb, PACKET_DIAG_MAX, (struct rtattr*)(r+1),
nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
@@ -2853,16 +2933,82 @@ static int packet_show_sock(const struct sockaddr_nl *addr,
}
if (tb[PACKET_DIAG_INFO]) {
- struct packet_diag_info *pinfo = RTA_DATA(tb[PACKET_DIAG_INFO]);
+ pinfo = RTA_DATA(tb[PACKET_DIAG_INFO]);
stat.lport = stat.iface = pinfo->pdi_index;
}
if (tb[PACKET_DIAG_UID])
stat.uid = *(__u32 *)RTA_DATA(tb[PACKET_DIAG_UID]);
+ if (tb[PACKET_DIAG_RX_RING])
+ ring_rx = RTA_DATA(tb[PACKET_DIAG_RX_RING]);
+
+ if (tb[PACKET_DIAG_TX_RING])
+ ring_tx = RTA_DATA(tb[PACKET_DIAG_TX_RING]);
+
+ if (tb[PACKET_DIAG_FANOUT]) {
+ has_fanout = true;
+ fanout = *(uint32_t *)RTA_DATA(tb[PACKET_DIAG_FANOUT]);
+ }
+
if (packet_stats_print(&stat, f))
return 0;
+ if (show_details) {
+ if (pinfo) {
+ printf("\n\tver:%d", pinfo->pdi_version);
+ printf(" cpy_thresh:%d", pinfo->pdi_copy_thresh);
+ printf(" flags( ");
+ if (pinfo->pdi_flags & PDI_RUNNING)
+ printf("running");
+ if (pinfo->pdi_flags & PDI_AUXDATA)
+ printf(" auxdata");
+ if (pinfo->pdi_flags & PDI_ORIGDEV)
+ printf(" origdev");
+ if (pinfo->pdi_flags & PDI_VNETHDR)
+ printf(" vnethdr");
+ if (pinfo->pdi_flags & PDI_LOSS)
+ printf(" loss");
+ if (!pinfo->pdi_flags)
+ printf("0");
+ printf(" )");
+ }
+ if (ring_rx) {
+ printf("\n\tring_rx(");
+ packet_show_ring(ring_rx);
+ printf(")");
+ }
+ if (ring_tx) {
+ printf("\n\tring_tx(");
+ packet_show_ring(ring_tx);
+ printf(")");
+ }
+ if (has_fanout) {
+ uint16_t type = (fanout >> 16) & 0xffff;
+
+ printf("\n\tfanout(");
+ printf("id:%d,", fanout & 0xffff);
+ printf("type:");
+
+ if (type == 0)
+ printf("hash");
+ else if (type == 1)
+ printf("lb");
+ else if (type == 2)
+ printf("cpu");
+ else if (type == 3)
+ printf("roll");
+ else if (type == 4)
+ printf("random");
+ else if (type == 5)
+ printf("qm");
+ else
+ printf("0x%x", type);
+
+ printf(")");
+ }
+ }
+
if (show_bpf && tb[PACKET_DIAG_FILTER]) {
struct sock_filter *fil =
RTA_DATA(tb[PACKET_DIAG_FILTER]);
@@ -2886,7 +3032,8 @@ static int packet_show_netlink(struct filter *f)
DIAG_REQUEST(req, struct packet_diag_req r);
req.r.sdiag_family = AF_PACKET;
- req.r.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MEMINFO | PACKET_SHOW_FILTER;
+ req.r.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MEMINFO |
+ PACKET_SHOW_FILTER | PACKET_SHOW_RING_CFG | PACKET_SHOW_FANOUT;
return handle_netlink_request(f, &req.nlh, sizeof(req), packet_show_sock);
}
@@ -2926,6 +3073,7 @@ static int packet_show_line(char *buf, const struct filter *f, int fam)
static int packet_show(struct filter *f)
{
FILE *fp;
+ int rc = 0;
if (!filter_af_get(f, AF_PACKET) || !(f->states & (1 << SS_CLOSE)))
return 0;
@@ -2937,9 +3085,10 @@ static int packet_show(struct filter *f)
if ((fp = net_packet_open()) == NULL)
return -1;
if (generic_record_read(fp, packet_show_line, f, AF_PACKET))
- return -1;
+ rc = -1;
- return 0;
+ fclose(fp);
+ return rc;
}
static int netlink_show_one(struct filter *f,
@@ -2983,11 +3132,13 @@ static int netlink_show_one(struct filter *f,
strncpy(procname, "kernel", 6);
} else if (pid > 0) {
FILE *fp;
- sprintf(procname, "%s/%d/stat",
+ snprintf(procname, sizeof(procname), "%s/%d/stat",
getenv("PROC_ROOT") ? : "/proc", pid);
if ((fp = fopen(procname, "r")) != NULL) {
if (fscanf(fp, "%*d (%[^)])", procname) == 1) {
- sprintf(procname+strlen(procname), "/%d", pid);
+ snprintf(procname+strlen(procname),
+ sizeof(procname)-strlen(procname),
+ "/%d", pid);
done = 1;
}
fclose(fp);
@@ -3106,9 +3257,12 @@ static int netlink_show(struct filter *f)
if ((fp = net_netlink_open()) == NULL)
return -1;
- fgets(buf, sizeof(buf)-1, fp);
+ if (!fgets(buf, sizeof(buf), fp)) {
+ fclose(fp);
+ return -1;
+ }
- while (fgets(buf, sizeof(buf)-1, fp)) {
+ while (fgets(buf, sizeof(buf), fp)) {
sscanf(buf, "%llx %d %d %x %d %d %llx %d",
&sk,
&prot, &pid, &groups, &rq, &wq, &cb, &rc);
@@ -3116,9 +3270,68 @@ static int netlink_show(struct filter *f)
netlink_show_one(f, prot, pid, groups, 0, 0, 0, rq, wq, sk, cb);
}
+ fclose(fp);
return 0;
}
+struct sock_diag_msg {
+ __u8 sdiag_family;
+};
+
+static int generic_show_sock(const struct sockaddr_nl *addr,
+ struct nlmsghdr *nlh, void *arg)
+{
+ struct sock_diag_msg *r = NLMSG_DATA(nlh);
+ struct inet_diag_arg inet_arg = { .f = arg, .protocol = IPPROTO_MAX };
+
+ switch (r->sdiag_family) {
+ case AF_INET:
+ case AF_INET6:
+ return show_one_inet_sock(addr, nlh, &inet_arg);
+ case AF_UNIX:
+ return unix_show_sock(addr, nlh, arg);
+ case AF_PACKET:
+ return packet_show_sock(addr, nlh, arg);
+ case AF_NETLINK:
+ return netlink_show_sock(addr, nlh, arg);
+ default:
+ return -1;
+ }
+}
+
+static int handle_follow_request(struct filter *f)
+{
+ int ret = -1;
+ int groups = 0;
+ struct rtnl_handle rth;
+
+ if (f->families & (1 << AF_INET) && f->dbs & (1 << TCP_DB))
+ groups |= 1 << (SKNLGRP_INET_TCP_DESTROY - 1);
+ if (f->families & (1 << AF_INET) && f->dbs & (1 << UDP_DB))
+ groups |= 1 << (SKNLGRP_INET_UDP_DESTROY - 1);
+ if (f->families & (1 << AF_INET6) && f->dbs & (1 << TCP_DB))
+ groups |= 1 << (SKNLGRP_INET6_TCP_DESTROY - 1);
+ if (f->families & (1 << AF_INET6) && f->dbs & (1 << UDP_DB))
+ groups |= 1 << (SKNLGRP_INET6_UDP_DESTROY - 1);
+
+ if (groups == 0)
+ return -1;
+
+ if (rtnl_open_byproto(&rth, groups, NETLINK_SOCK_DIAG))
+ return -1;
+
+ rth.dump = 0;
+ rth.local.nl_pid = 0;
+
+ if (rtnl_dump_filter(&rth, generic_show_sock, f))
+ goto Exit;
+
+ ret = 0;
+Exit:
+ rtnl_close(&rth);
+ return ret;
+}
+
struct snmpstat
{
int tcp_estab;
@@ -3301,6 +3514,7 @@ static void _usage(FILE *dest)
" -i, --info show internal TCP information\n"
" -s, --summary show socket usage summary\n"
" -b, --bpf show bpf filter socket information\n"
+" -E, --events continually display sockets as they are destroyed\n"
" -Z, --context display process SELinux security contexts\n"
" -z, --contexts display process and socket SELinux security contexts\n"
" -N, --net switch to the specified network namespace name\n"
@@ -3315,6 +3529,8 @@ static void _usage(FILE *dest)
" -x, --unix display only Unix domain sockets\n"
" -f, --family=FAMILY display sockets of type FAMILY\n"
"\n"
+" -K, --kill forcibly close sockets, display what was closed\n"
+"\n"
" -A, --query=QUERY, --socket=QUERY\n"
" QUERY := {all|inet|tcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|netlink}[,QUERY]\n"
"\n"
@@ -3383,6 +3599,7 @@ static const struct option long_opts[] = {
{ "info", 0, 0, 'i' },
{ "processes", 0, 0, 'p' },
{ "bpf", 0, 0, 'b' },
+ { "events", 0, 0, 'E' },
{ "dccp", 0, 0, 'd' },
{ "tcp", 0, 0, 't' },
{ "udp", 0, 0, 'u' },
@@ -3404,6 +3621,7 @@ static const struct option long_opts[] = {
{ "context", 0, 0, 'Z' },
{ "contexts", 0, 0, 'z' },
{ "net", 1, 0, 'N' },
+ { "kill", 0, 0, 'K' },
{ 0 }
};
@@ -3416,10 +3634,9 @@ int main(int argc, char *argv[])
const char *dump_tcpdiag = NULL;
FILE *filter_fp = NULL;
int ch;
- struct filter dbs_filter = {};
int state_filter = 0;
- while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbf:miA:D:F:vVzZN:",
+ while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbEf:miA:D:F:vVzZN:K",
long_opts, NULL)) != EOF) {
switch(ch) {
case 'n':
@@ -3449,17 +3666,20 @@ int main(int argc, char *argv[])
show_options = 1;
show_bpf++;
break;
+ case 'E':
+ follow_events = 1;
+ break;
case 'd':
- filter_db_set(&dbs_filter, DCCP_DB);
+ filter_db_set(&current_filter, DCCP_DB);
break;
case 't':
- filter_db_set(&dbs_filter, TCP_DB);
+ filter_db_set(&current_filter, TCP_DB);
break;
case 'u':
- filter_db_set(&dbs_filter, UDP_DB);
+ filter_db_set(&current_filter, UDP_DB);
break;
case 'w':
- filter_db_set(&dbs_filter, RAW_DB);
+ filter_db_set(&current_filter, RAW_DB);
break;
case 'x':
filter_af_set(&current_filter, AF_UNIX);
@@ -3503,6 +3723,8 @@ int main(int argc, char *argv[])
char *p, *p1;
if (!saw_query) {
current_filter.dbs = 0;
+ state_filter = state_filter ?
+ state_filter : SS_CONN;
saw_query = 1;
do_default = 0;
}
@@ -3511,44 +3733,44 @@ int main(int argc, char *argv[])
if ((p1 = strchr(p, ',')) != NULL)
*p1 = 0;
if (strcmp(p, "all") == 0) {
- filter_default_dbs(&dbs_filter);
+ filter_default_dbs(&current_filter);
} else if (strcmp(p, "inet") == 0) {
- filter_db_set(&dbs_filter, UDP_DB);
- filter_db_set(&dbs_filter, DCCP_DB);
- filter_db_set(&dbs_filter, TCP_DB);
- filter_db_set(&dbs_filter, RAW_DB);
+ filter_db_set(&current_filter, UDP_DB);
+ filter_db_set(&current_filter, DCCP_DB);
+ filter_db_set(&current_filter, TCP_DB);
+ filter_db_set(&current_filter, RAW_DB);
} else if (strcmp(p, "udp") == 0) {
- filter_db_set(&dbs_filter, UDP_DB);
+ filter_db_set(&current_filter, UDP_DB);
} else if (strcmp(p, "dccp") == 0) {
- filter_db_set(&dbs_filter, DCCP_DB);
+ filter_db_set(&current_filter, DCCP_DB);
} else if (strcmp(p, "tcp") == 0) {
- filter_db_set(&dbs_filter, TCP_DB);
+ filter_db_set(&current_filter, TCP_DB);
} else if (strcmp(p, "raw") == 0) {
- filter_db_set(&dbs_filter, RAW_DB);
+ filter_db_set(&current_filter, RAW_DB);
} else if (strcmp(p, "unix") == 0) {
- filter_db_set(&dbs_filter, UNIX_ST_DB);
- filter_db_set(&dbs_filter, UNIX_DG_DB);
- filter_db_set(&dbs_filter, UNIX_SQ_DB);
+ filter_db_set(&current_filter, UNIX_ST_DB);
+ filter_db_set(&current_filter, UNIX_DG_DB);
+ filter_db_set(&current_filter, UNIX_SQ_DB);
} else if (strcasecmp(p, "unix_stream") == 0 ||
strcmp(p, "u_str") == 0) {
- filter_db_set(&dbs_filter, UNIX_ST_DB);
+ filter_db_set(&current_filter, UNIX_ST_DB);
} else if (strcasecmp(p, "unix_dgram") == 0 ||
strcmp(p, "u_dgr") == 0) {
- filter_db_set(&dbs_filter, UNIX_DG_DB);
+ filter_db_set(&current_filter, UNIX_DG_DB);
} else if (strcasecmp(p, "unix_seqpacket") == 0 ||
strcmp(p, "u_seq") == 0) {
- filter_db_set(&dbs_filter, UNIX_SQ_DB);
+ filter_db_set(&current_filter, UNIX_SQ_DB);
} else if (strcmp(p, "packet") == 0) {
- filter_db_set(&dbs_filter, PACKET_R_DB);
- filter_db_set(&dbs_filter, PACKET_DG_DB);
+ filter_db_set(&current_filter, PACKET_R_DB);
+ filter_db_set(&current_filter, PACKET_DG_DB);
} else if (strcmp(p, "packet_raw") == 0 ||
strcmp(p, "p_raw") == 0) {
- filter_db_set(&dbs_filter, PACKET_R_DB);
+ filter_db_set(&current_filter, PACKET_R_DB);
} else if (strcmp(p, "packet_dgram") == 0 ||
strcmp(p, "p_dgr") == 0) {
- filter_db_set(&dbs_filter, PACKET_DG_DB);
+ filter_db_set(&current_filter, PACKET_DG_DB);
} else if (strcmp(p, "netlink") == 0) {
- filter_db_set(&dbs_filter, NETLINK_DB);
+ filter_db_set(&current_filter, NETLINK_DB);
} else {
fprintf(stderr, "ss: \"%s\" is illegal socket table id\n", p);
usage();
@@ -3595,9 +3817,12 @@ int main(int argc, char *argv[])
if (netns_switch(optarg))
exit(1);
break;
+ case 'K':
+ current_filter.kill = 1;
+ break;
case 'h':
- case '?':
help();
+ case '?':
default:
usage();
}
@@ -3612,12 +3837,6 @@ int main(int argc, char *argv[])
exit(0);
}
- /* Now parse filter... */
- if (argc == 0 && filter_fp) {
- if (ssfilter_parse(&current_filter.f, 0, NULL, filter_fp))
- usage();
- }
-
while (argc > 0) {
if (strcmp(*argv, "state") == 0) {
NEXT_ARG();
@@ -3641,11 +3860,11 @@ int main(int argc, char *argv[])
if (do_default) {
state_filter = state_filter ? state_filter : SS_CONN;
filter_default_dbs(&current_filter);
- filter_merge(&current_filter, &current_filter, state_filter);
- } else {
- filter_merge(&current_filter, &dbs_filter, state_filter);
}
+ filter_states_set(&current_filter, state_filter);
+ filter_merge_defaults(&current_filter);
+
if (resolve_services && resolve_hosts &&
(current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB))))
init_service_resolver();
@@ -3741,6 +3960,9 @@ int main(int argc, char *argv[])
fflush(stdout);
+ if (follow_events)
+ exit(handle_follow_request(&current_filter));
+
if (current_filter.dbs & (1<<NETLINK_DB))
netlink_show(&current_filter);
if (current_filter.dbs & PACKET_DBM)
diff --git a/misc/ssfilter.h b/misc/ssfilter.h
index b20092bc..53922a84 100644
--- a/misc/ssfilter.h
+++ b/misc/ssfilter.h
@@ -20,4 +20,3 @@ struct ssfilter
int ssfilter_parse(struct ssfilter **f, int argc, char **argv, FILE *fp);
void *parse_hostcond(char *addr, bool is_port);
-
diff --git a/tc/Android.mk b/tc/Android.mk
index 1e8b3be0..467b08a0 100644
--- a/tc/Android.mk
+++ b/tc/Android.mk
@@ -1,7 +1,7 @@
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := tc.c tc_qdisc.c q_cbq.c tc_util.c tc_class.c tc_core.c m_action.c \
+LOCAL_SRC_FILES := tc.c tc_exec.c tc_qdisc.c q_cbq.c tc_util.c tc_class.c tc_core.c m_action.c \
m_estimator.c tc_filter.c tc_monitor.c tc_stab.c tc_cbq.c \
tc_estimator.c f_u32.c m_police.c q_ingress.c m_mirred.c q_htb.c
diff --git a/tc/Makefile b/tc/Makefile
index d831a153..f5bea877 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -1,6 +1,6 @@
-TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o \
- tc_monitor.o tc_bpf.o m_police.o m_estimator.o m_action.o \
- m_ematch.o emp_ematch.yacc.o emp_ematch.lex.o
+TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o tc_monitor.o \
+ tc_exec.o tc_bpf.o m_police.o m_estimator.o m_action.o m_ematch.o \
+ emp_ematch.yacc.o emp_ematch.lex.o
include ../Config
@@ -30,6 +30,7 @@ TCMODULES += f_basic.o
TCMODULES += f_bpf.o
TCMODULES += f_flow.o
TCMODULES += f_cgroup.o
+TCMODULES += f_flower.o
TCMODULES += q_dsmark.o
TCMODULES += q_gred.o
TCMODULES += f_tcindex.o
@@ -46,6 +47,7 @@ TCMODULES += m_skbedit.o
TCMODULES += m_csum.o
TCMODULES += m_simple.o
TCMODULES += m_vlan.o
+TCMODULES += m_connmark.o
TCMODULES += m_bpf.o
TCMODULES += p_ip.o
TCMODULES += p_icmp.o
@@ -62,6 +64,8 @@ TCMODULES += q_fq_codel.o
TCMODULES += q_fq.o
TCMODULES += q_pie.o
TCMODULES += q_hhf.o
+TCMODULES += q_clsact.o
+TCMODULES += e_bpf.o
ifeq ($(TC_CONFIG_IPSET), y)
ifeq ($(TC_CONFIG_XT), y)
@@ -89,6 +93,11 @@ else
endif
endif
+ifeq ($(TC_CONFIG_ELF),y)
+ CFLAGS += -DHAVE_ELF
+ LDLIBS += -lelf
+endif
+
TCOBJ += $(TCMODULES)
LDLIBS += -L. -ltc -lm
diff --git a/tc/README.last b/tc/README.last
index 9400438a..63f6f7b0 100644
--- a/tc/README.last
+++ b/tc/README.last
@@ -43,5 +43,3 @@ It is available only for alpha and pentiums with correct
CPU timestamp. It is the fastest way, use it when it is available,
but remember: not all pentiums have this facility, and
a lot of them have clock, broken by APM etc. etc.
-
-
diff --git a/tc/e_bpf.c b/tc/e_bpf.c
new file mode 100644
index 00000000..2d650a46
--- /dev/null
+++ b/tc/e_bpf.c
@@ -0,0 +1,179 @@
+/*
+ * e_bpf.c BPF exec proxy
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Daniel Borkmann <daniel@iogearbox.net>
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#include "tc_util.h"
+#include "tc_bpf.h"
+
+#include "bpf_elf.h"
+#include "bpf_scm.h"
+
+#define BPF_DEFAULT_CMD "/bin/sh"
+
+static char *argv_default[] = { BPF_DEFAULT_CMD, NULL };
+
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... bpf [ import UDS_FILE ] [ run CMD ]\n");
+ fprintf(stderr, " ... bpf [ debug ]\n");
+ fprintf(stderr, " ... bpf [ graft MAP_FILE ] [ key KEY ]\n");
+ fprintf(stderr, " `... [ object-file OBJ_FILE ] [ type TYPE ] [ section NAME ] [ verbose ]\n");
+ fprintf(stderr, " `... [ object-pinned PROG_FILE ]\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where UDS_FILE provides the name of a unix domain socket file\n");
+ fprintf(stderr, "to import eBPF maps and the optional CMD denotes the command\n");
+ fprintf(stderr, "to be executed (default: \'%s\').\n", BPF_DEFAULT_CMD);
+ fprintf(stderr, "Where MAP_FILE points to a pinned map, OBJ_FILE to an object file\n");
+ fprintf(stderr, "and PROG_FILE to a pinned program. TYPE can be {cls, act}, where\n");
+ fprintf(stderr, "\'cls\' is default. KEY is optional and can be inferred from the\n");
+ fprintf(stderr, "section name, otherwise it needs to be provided.\n");
+}
+
+static int bpf_num_env_entries(void)
+{
+ char **envp;
+ int num;
+
+ for (num = 0, envp = environ; *envp != NULL; envp++)
+ num++;
+ return num;
+}
+
+static int parse_bpf(struct exec_util *eu, int argc, char **argv)
+{
+ char **argv_run = argv_default, **envp_run, *tmp;
+ int ret, i, env_old, env_num, env_map;
+ const char *bpf_uds_name = NULL;
+ int fds[BPF_SCM_MAX_FDS];
+ struct bpf_map_aux aux;
+
+ if (argc == 0)
+ return 0;
+
+ while (argc > 0) {
+ if (matches(*argv, "run") == 0) {
+ NEXT_ARG();
+ argv_run = argv;
+ break;
+ } else if (matches(*argv, "import") == 0) {
+ NEXT_ARG();
+ bpf_uds_name = *argv;
+ } else if (matches(*argv, "debug") == 0 ||
+ matches(*argv, "dbg") == 0) {
+ if (bpf_trace_pipe())
+ fprintf(stderr,
+ "No trace pipe, tracefs not mounted?\n");
+ return -1;
+ } else if (matches(*argv, "graft") == 0) {
+ const char *bpf_map_path;
+ bool has_key = false;
+ uint32_t key;
+
+ NEXT_ARG();
+ bpf_map_path = *argv;
+ NEXT_ARG();
+ if (matches(*argv, "key") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&key, *argv, 0)) {
+ fprintf(stderr, "Illegal \"key\"\n");
+ return -1;
+ }
+ has_key = true;
+ NEXT_ARG();
+ }
+ return bpf_graft_map(bpf_map_path, has_key ?
+ &key : NULL, argc, argv);
+ } else {
+ explain();
+ return -1;
+ }
+
+ NEXT_ARG_FWD();
+ }
+
+ if (!bpf_uds_name) {
+ fprintf(stderr, "bpf: No import parameter provided!\n");
+ explain();
+ return -1;
+ }
+
+ if (argv_run != argv_default && argc == 0) {
+ fprintf(stderr, "bpf: No run command provided!\n");
+ explain();
+ return -1;
+ }
+
+ memset(fds, 0, sizeof(fds));
+ memset(&aux, 0, sizeof(aux));
+
+ ret = bpf_recv_map_fds(bpf_uds_name, fds, &aux, ARRAY_SIZE(fds));
+ if (ret < 0) {
+ fprintf(stderr, "bpf: Could not receive fds!\n");
+ return -1;
+ }
+
+ if (aux.num_ent == 0) {
+ envp_run = environ;
+ goto out;
+ }
+
+ env_old = bpf_num_env_entries();
+ env_num = env_old + aux.num_ent + 2;
+ env_map = env_old + 1;
+
+ envp_run = malloc(sizeof(*envp_run) * env_num);
+ if (!envp_run) {
+ fprintf(stderr, "bpf: No memory left to allocate env!\n");
+ goto err;
+ }
+
+ for (i = 0; i < env_old; i++)
+ envp_run[i] = environ[i];
+
+ ret = asprintf(&tmp, "BPF_NUM_MAPS=%u", aux.num_ent);
+ if (ret < 0)
+ goto err_free;
+
+ envp_run[env_old] = tmp;
+
+ for (i = env_map; i < env_num - 1; i++) {
+ ret = asprintf(&tmp, "BPF_MAP%u=%u",
+ aux.ent[i - env_map].id,
+ fds[i - env_map]);
+ if (ret < 0)
+ goto err_free_env;
+
+ envp_run[i] = tmp;
+ }
+
+ envp_run[env_num - 1] = NULL;
+out:
+ return execvpe(argv_run[0], argv_run, envp_run);
+
+err_free_env:
+ for (--i; i >= env_old; i--)
+ free(envp_run[i]);
+err_free:
+ free(envp_run);
+err:
+ for (i = 0; i < aux.num_ent; i++)
+ close(fds[i]);
+ return -1;
+}
+
+struct exec_util bpf_exec_util = {
+ .id = "bpf",
+ .parse_eopt = parse_bpf,
+};
diff --git a/tc/emp_ematch.y b/tc/emp_ematch.y
index bc08da20..2e6cf353 100644
--- a/tc/emp_ematch.y
+++ b/tc/emp_ematch.y
@@ -98,4 +98,3 @@ invert:
{
ematch_err = strdup(s);
}
-
diff --git a/tc/f_basic.c b/tc/f_basic.c
index 1c33ca3d..4adf1d22 100644
--- a/tc/f_basic.c
+++ b/tc/f_basic.c
@@ -43,9 +43,6 @@ static int basic_parse_opt(struct filter_util *qu, char *handle,
struct rtattr *tail;
long h = 0;
- if (argc == 0)
- return 0;
-
if (handle) {
h = strtol(handle, NULL, 0);
if (h == LONG_MIN || h == LONG_MAX) {
@@ -54,9 +51,11 @@ static int basic_parse_opt(struct filter_util *qu, char *handle,
return -1;
}
}
-
t->tcm_handle = h;
+ if (argc == 0)
+ return 0;
+
tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index e2af94e3..afc2e582 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -11,100 +11,102 @@
#include <stdio.h>
#include <stdlib.h>
-#include <unistd.h>
-#include <syslog.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <string.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <limits.h>
-#include <linux/filter.h>
-#include <linux/if.h>
+
+#include <linux/bpf.h>
#include "utils.h"
#include "tc_util.h"
#include "tc_bpf.h"
+static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS;
+
+static const int nla_tbl[BPF_NLA_MAX] = {
+ [BPF_NLA_OPS_LEN] = TCA_BPF_OPS_LEN,
+ [BPF_NLA_OPS] = TCA_BPF_OPS,
+ [BPF_NLA_FD] = TCA_BPF_FD,
+ [BPF_NLA_NAME] = TCA_BPF_NAME,
+};
+
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ...\n");
fprintf(stderr, "\n");
- fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n");
- fprintf(stderr, " [from file]: run bytecode-file FILE\n");
+ fprintf(stderr, "BPF use case:\n");
+ fprintf(stderr, " bytecode BPF_BYTECODE\n");
+ fprintf(stderr, " bytecode-file FILE\n");
fprintf(stderr, "\n");
- fprintf(stderr, " [ action ACTION_SPEC ]\n");
- fprintf(stderr, " [ classid CLASSID ]\n");
+ fprintf(stderr, "eBPF use case:\n");
+ fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]");
+ fprintf(stderr, " [ verbose ] [ direct-action ]\n");
+ fprintf(stderr, " object-pinned FILE [ direct-action ]\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Common remaining options:\n");
+ fprintf(stderr, " [ action ACTION_SPEC ]\n");
+ fprintf(stderr, " [ classid CLASSID ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
- fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
- fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n");
- fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n");
+ fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
+ fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n");
+ fprintf(stderr, "pinned eBPF program.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n");
+ fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type));
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
+ fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "ACTION_SPEC := ... look at individual actions\n");
fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
}
static int bpf_parse_opt(struct filter_util *qu, char *handle,
int argc, char **argv, struct nlmsghdr *n)
{
+ const char *bpf_obj = NULL, *bpf_uds_name = NULL;
struct tcmsg *t = NLMSG_DATA(n);
+ unsigned int bpf_flags = 0;
+ bool seen_run = false;
struct rtattr *tail;
- long h = 0;
+ int ret = 0;
if (argc == 0)
return 0;
if (handle) {
- h = strtol(handle, NULL, 0);
- if (h == LONG_MIN || h == LONG_MAX) {
- fprintf(stderr, "Illegal handle \"%s\", must be "
- "numeric.\n", handle);
+ if (get_u32(&t->tcm_handle, handle, 0)) {
+ fprintf(stderr, "Illegal \"handle\"\n");
return -1;
}
}
- t->tcm_handle = h;
-
- tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+ tail = (struct rtattr *)(((void *)n) + NLMSG_ALIGN(n->nlmsg_len));
addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
while (argc > 0) {
if (matches(*argv, "run") == 0) {
- bool from_file;
- struct sock_filter bpf_ops[BPF_MAXINSNS];
- __u16 bpf_len;
- int ret;
-
- NEXT_ARG();
- if (strcmp(*argv, "bytecode-file") == 0) {
- from_file = true;
- } else if (strcmp(*argv, "bytecode") == 0) {
- from_file = false;
- } else {
- fprintf(stderr, "What is \"%s\"?\n", *argv);
- explain();
- return -1;
- }
NEXT_ARG();
- ret = bpf_parse_ops(argc, argv, bpf_ops, from_file);
- if (ret < 0) {
- fprintf(stderr, "Illegal \"bytecode\"\n");
+opt_bpf:
+ seen_run = true;
+ if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
+ &bpf_obj, &bpf_uds_name, n)) {
+ fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
return -1;
}
- bpf_len = ret;
- addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len);
- addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
- bpf_len * sizeof(struct sock_filter));
} else if (matches(*argv, "classid") == 0 ||
- strcmp(*argv, "flowid") == 0) {
- unsigned handle;
+ matches(*argv, "flowid") == 0) {
+ unsigned int handle;
+
NEXT_ARG();
if (get_tc_classid(&handle, *argv)) {
fprintf(stderr, "Illegal \"classid\"\n");
return -1;
}
- addattr_l(n, MAX_MSG, TCA_BPF_CLASSID, &handle, 4);
+ addattr32(n, MAX_MSG, TCA_BPF_CLASSID, handle);
+ } else if (matches(*argv, "direct-action") == 0 ||
+ matches(*argv, "da") == 0) {
+ bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
} else if (matches(*argv, "action") == 0) {
NEXT_ARG();
if (parse_action(&argc, &argv, TCA_BPF_ACT, n)) {
@@ -119,19 +121,30 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
return -1;
}
continue;
- } else if (strcmp(*argv, "help") == 0) {
+ } else if (matches(*argv, "help") == 0) {
explain();
return -1;
} else {
+ if (!seen_run)
+ goto opt_bpf;
+
fprintf(stderr, "What is \"%s\"?\n", *argv);
explain();
return -1;
}
- argc--; argv++;
+
+ NEXT_ARG_FWD();
}
- tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
- return 0;
+ if (bpf_obj && bpf_flags)
+ addattr32(n, MAX_MSG, TCA_BPF_FLAGS, bpf_flags);
+
+ tail->rta_len = (((void *)n) + n->nlmsg_len) - (void *)tail;
+
+ if (bpf_uds_name)
+ ret = bpf_send_map_fds(bpf_uds_name, bpf_obj);
+
+ return ret;
}
static int bpf_print_opt(struct filter_util *qu, FILE *f,
@@ -153,9 +166,23 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
sprint_tc_classid(rta_getattr_u32(tb[TCA_BPF_CLASSID]), b1));
}
- if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
+ if (tb[TCA_BPF_NAME])
+ fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME]));
+ else if (tb[TCA_BPF_FD])
+ fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
+
+ if (tb[TCA_BPF_FLAGS]) {
+ unsigned int flags = rta_getattr_u32(tb[TCA_BPF_FLAGS]);
+
+ if (flags & TCA_BPF_FLAG_ACT_DIRECT)
+ fprintf(f, "direct-action ");
+ }
+
+ if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) {
bpf_print_ops(f, tb[TCA_BPF_OPS],
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
+ fprintf(f, "\n");
+ }
if (tb[TCA_BPF_POLICE]) {
fprintf(f, "\n");
@@ -170,7 +197,7 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
}
struct filter_util bpf_filter_util = {
- .id = "bpf",
- .parse_fopt = bpf_parse_opt,
- .print_fopt = bpf_print_opt,
+ .id = "bpf",
+ .parse_fopt = bpf_parse_opt,
+ .print_fopt = bpf_print_opt,
};
diff --git a/tc/f_flower.c b/tc/f_flower.c
new file mode 100644
index 00000000..db9cc296
--- /dev/null
+++ b/tc/f_flower.c
@@ -0,0 +1,519 @@
+/*
+ * f_flower.c Flower Classifier
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Jiri Pirko <jiri@resnulli.us>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <string.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+#include "utils.h"
+#include "tc_util.h"
+#include "rt_names.h"
+
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... flower [ MATCH-LIST ]\n");
+ fprintf(stderr, " [ action ACTION-SPEC ] [ classid CLASSID ]\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where: MATCH-LIST := [ MATCH-LIST ] MATCH\n");
+ fprintf(stderr, " MATCH := { indev DEV-NAME | \n");
+ fprintf(stderr, " dst_mac MAC-ADDR | \n");
+ fprintf(stderr, " src_mac MAC-ADDR | \n");
+ fprintf(stderr, " [ipv4 | ipv6 ] | \n");
+ fprintf(stderr, " ip_proto [tcp | udp | IP-PROTO ] | \n");
+ fprintf(stderr, " dst_ip [ IPV4-ADDR | IPV6-ADDR ] | \n");
+ fprintf(stderr, " src_ip [ IPV4-ADDR | IPV6-ADDR ] | \n");
+ fprintf(stderr, " dst_port PORT-NUMBER | \n");
+ fprintf(stderr, " src_port PORT-NUMBER }\n");
+ fprintf(stderr, " FILTERID := X:Y:Z\n");
+ fprintf(stderr, " ACTION-SPEC := ... look at individual actions\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "NOTE: CLASSID, ETH-TYPE, IP-PROTO are parsed as hexadecimal input.\n");
+ fprintf(stderr, "NOTE: There can be only used one mask per one prio. If user needs\n");
+ fprintf(stderr, " to specify different mask, he has to use different prio.\n");
+}
+
+static int flower_parse_eth_addr(char *str, int addr_type, int mask_type,
+ struct nlmsghdr *n)
+{
+ int ret;
+ char addr[ETH_ALEN];
+
+ ret = ll_addr_a2n(addr, sizeof(addr), str);
+ if (ret < 0)
+ return -1;
+ addattr_l(n, MAX_MSG, addr_type, addr, sizeof(addr));
+ memset(addr, 0xff, ETH_ALEN);
+ addattr_l(n, MAX_MSG, mask_type, addr, sizeof(addr));
+ return 0;
+}
+
+static int flower_parse_ip_proto(char *str, __be16 eth_type, int type,
+ __u8 *p_ip_proto, struct nlmsghdr *n)
+{
+ int ret;
+ __u8 ip_proto;
+
+ if (eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6)) {
+ fprintf(stderr, "Illegal \"eth_type\" for ip proto\n");
+ return -1;
+ }
+ if (matches(str, "tcp") == 0) {
+ ip_proto = IPPROTO_TCP;
+ } else if (matches(str, "udp") == 0) {
+ ip_proto = IPPROTO_UDP;
+ } else {
+ ret = get_u8(&ip_proto, str, 16);
+ if (ret)
+ return -1;
+ }
+ addattr8(n, MAX_MSG, type, ip_proto);
+ *p_ip_proto = ip_proto;
+ return 0;
+}
+
+static int flower_parse_ip_addr(char *str, __be16 eth_type,
+ int addr4_type, int mask4_type,
+ int addr6_type, int mask6_type,
+ struct nlmsghdr *n)
+{
+ int ret;
+ inet_prefix addr;
+ int family;
+ int bits;
+ int i;
+
+ if (eth_type == htons(ETH_P_IP)) {
+ family = AF_INET;
+ } else if (eth_type == htons(ETH_P_IPV6)) {
+ family = AF_INET6;
+ } else {
+ fprintf(stderr, "Illegal \"eth_type\" for ip address\n");
+ return -1;
+ }
+
+ ret = get_prefix(&addr, str, family);
+ if (ret)
+ return -1;
+
+ if (addr.family != family)
+ return -1;
+
+ addattr_l(n, MAX_MSG, addr.family == AF_INET ? addr4_type : addr6_type,
+ addr.data, addr.bytelen);
+
+ memset(addr.data, 0xff, addr.bytelen);
+ bits = addr.bitlen;
+ for (i = 0; i < addr.bytelen / 4; i++) {
+ if (!bits) {
+ addr.data[i] = 0;
+ } else if (bits / 32 >= 1) {
+ bits -= 32;
+ } else {
+ addr.data[i] <<= 32 - bits;
+ addr.data[i] = htonl(addr.data[i]);
+ bits = 0;
+ }
+ }
+
+ addattr_l(n, MAX_MSG, addr.family == AF_INET ? mask4_type : mask6_type,
+ addr.data, addr.bytelen);
+
+ return 0;
+}
+
+static int flower_parse_port(char *str, __u8 ip_port,
+ int tcp_type, int udp_type, struct nlmsghdr *n)
+{
+ int ret;
+ int type;
+ __be16 port;
+
+ if (ip_port == IPPROTO_TCP) {
+ type = tcp_type;
+ } else if (ip_port == IPPROTO_UDP) {
+ type = udp_type;
+ } else {
+ fprintf(stderr, "Illegal \"ip_proto\" for port\n");
+ return -1;
+ }
+
+ ret = get_u16(&port, str, 10);
+ if (ret)
+ return -1;
+
+ addattr16(n, MAX_MSG, type, htons(port));
+
+ return 0;
+}
+
+static int flower_parse_opt(struct filter_util *qu, char *handle,
+ int argc, char **argv, struct nlmsghdr *n)
+{
+ int ret;
+ struct tcmsg *t = NLMSG_DATA(n);
+ struct rtattr *tail;
+ __be16 eth_type = TC_H_MIN(t->tcm_info);
+ __u8 ip_proto = 0xff;
+
+ if (handle) {
+ ret = get_u32(&t->tcm_handle, handle, 0);
+ if (ret) {
+ fprintf(stderr, "Illegal \"handle\"\n");
+ return -1;
+ }
+ }
+
+ tail = (struct rtattr *) (((void *) n) + NLMSG_ALIGN(n->nlmsg_len));
+ addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
+
+ if (argc == 0) {
+ /*at minimal we will match all ethertype packets */
+ goto parse_done;
+ }
+
+ while (argc > 0) {
+ if (matches(*argv, "classid") == 0 ||
+ matches(*argv, "flowid") == 0) {
+ unsigned handle;
+
+ NEXT_ARG();
+ ret = get_tc_classid(&handle, *argv);
+ if (ret) {
+ fprintf(stderr, "Illegal \"classid\"\n");
+ return -1;
+ }
+ addattr_l(n, MAX_MSG, TCA_FLOWER_CLASSID, &handle, 4);
+ } else if (matches(*argv, "indev") == 0) {
+ char ifname[IFNAMSIZ];
+
+ NEXT_ARG();
+ memset(ifname, 0, sizeof(ifname));
+ strncpy(ifname, *argv, sizeof(ifname) - 1);
+ addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, ifname);
+ } else if (matches(*argv, "dst_mac") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_eth_addr(*argv,
+ TCA_FLOWER_KEY_ETH_DST,
+ TCA_FLOWER_KEY_ETH_DST_MASK,
+ n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"dst_mac\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "src_mac") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_eth_addr(*argv,
+ TCA_FLOWER_KEY_ETH_SRC,
+ TCA_FLOWER_KEY_ETH_SRC_MASK,
+ n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"src_mac\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "ip_proto") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_ip_proto(*argv, eth_type,
+ TCA_FLOWER_KEY_IP_PROTO,
+ &ip_proto, n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"ip_proto\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "dst_ip") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_ip_addr(*argv, eth_type,
+ TCA_FLOWER_KEY_IPV4_DST,
+ TCA_FLOWER_KEY_IPV4_DST_MASK,
+ TCA_FLOWER_KEY_IPV6_DST,
+ TCA_FLOWER_KEY_IPV6_DST_MASK,
+ n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"dst_ip\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "src_ip") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_ip_addr(*argv, eth_type,
+ TCA_FLOWER_KEY_IPV4_SRC,
+ TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ TCA_FLOWER_KEY_IPV6_SRC,
+ TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"src_ip\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "dst_port") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_port(*argv, ip_proto,
+ TCA_FLOWER_KEY_TCP_DST,
+ TCA_FLOWER_KEY_UDP_DST, n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"dst_port\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "src_port") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_port(*argv, ip_proto,
+ TCA_FLOWER_KEY_TCP_SRC,
+ TCA_FLOWER_KEY_UDP_SRC, n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"src_port\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "action") == 0) {
+ NEXT_ARG();
+ ret = parse_action(&argc, &argv, TCA_FLOWER_ACT, n);
+ if (ret) {
+ fprintf(stderr, "Illegal \"action\"\n");
+ return -1;
+ }
+ continue;
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+parse_done:
+ ret = addattr16(n, MAX_MSG, TCA_FLOWER_KEY_ETH_TYPE, eth_type);
+ if (ret) {
+ fprintf(stderr, "Illegal \"eth_type\"(0x%x)\n",
+ ntohs(eth_type));
+ return -1;
+ }
+
+ tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
+
+ return 0;
+}
+
+static int __mask_bits(char *addr, size_t len)
+{
+ int bits = 0;
+ bool hole = false;
+ int i;
+ int j;
+
+ for (i = 0; i < len; i++, addr++) {
+ for (j = 7; j >= 0; j--) {
+ if (((*addr) >> j) & 0x1) {
+ if (hole)
+ return -1;
+ bits++;
+ } else if (bits) {
+ hole = true;
+ } else{
+ return -1;
+ }
+ }
+ }
+ return bits;
+}
+
+static void flower_print_eth_addr(FILE *f, char *name,
+ struct rtattr *addr_attr,
+ struct rtattr *mask_attr)
+{
+ SPRINT_BUF(b1);
+ int bits;
+
+ if (!addr_attr || RTA_PAYLOAD(addr_attr) != ETH_ALEN)
+ return;
+ fprintf(f, "\n %s %s", name, ll_addr_n2a(RTA_DATA(addr_attr), ETH_ALEN,
+ 0, b1, sizeof(b1)));
+ if (!mask_attr || RTA_PAYLOAD(mask_attr) != ETH_ALEN)
+ return;
+ bits = __mask_bits(RTA_DATA(mask_attr), ETH_ALEN);
+ if (bits < 0)
+ fprintf(f, "/%s", ll_addr_n2a(RTA_DATA(mask_attr), ETH_ALEN,
+ 0, b1, sizeof(b1)));
+ else if (bits < ETH_ALEN * 8)
+ fprintf(f, "/%d", bits);
+}
+
+static void flower_print_eth_type(FILE *f, __be16 *p_eth_type,
+ struct rtattr *eth_type_attr)
+{
+ __be16 eth_type;
+
+ if (!eth_type_attr)
+ return;
+
+ eth_type = rta_getattr_u16(eth_type_attr);
+ fprintf(f, "\n eth_type ");
+ if (eth_type == htons(ETH_P_IP))
+ fprintf(f, "ipv4");
+ else if (eth_type == htons(ETH_P_IPV6))
+ fprintf(f, "ipv6");
+ else
+ fprintf(f, "%04x", ntohs(eth_type));
+ *p_eth_type = eth_type;
+}
+
+static void flower_print_ip_proto(FILE *f, __u8 *p_ip_proto,
+ struct rtattr *ip_proto_attr)
+{
+ __u8 ip_proto;
+
+ if (!ip_proto_attr)
+ return;
+
+ ip_proto = rta_getattr_u8(ip_proto_attr);
+ fprintf(f, "\n ip_proto ");
+ if (ip_proto == IPPROTO_TCP)
+ fprintf(f, "tcp");
+ else if (ip_proto == IPPROTO_UDP)
+ fprintf(f, "udp");
+ else
+ fprintf(f, "%02x", ip_proto);
+ *p_ip_proto = ip_proto;
+}
+
+static void flower_print_ip_addr(FILE *f, char *name, __be16 eth_type,
+ struct rtattr *addr4_attr,
+ struct rtattr *mask4_attr,
+ struct rtattr *addr6_attr,
+ struct rtattr *mask6_attr)
+{
+ SPRINT_BUF(b1);
+ struct rtattr *addr_attr;
+ struct rtattr *mask_attr;
+ int family;
+ size_t len;
+ int bits;
+
+ if (eth_type == htons(ETH_P_IP)) {
+ family = AF_INET;
+ addr_attr = addr4_attr;
+ mask_attr = mask4_attr;
+ len = 4;
+ } else if (eth_type == htons(ETH_P_IPV6)) {
+ family = AF_INET6;
+ addr_attr = addr6_attr;
+ mask_attr = mask6_attr;
+ len = 16;
+ } else {
+ return;
+ }
+ if (!addr_attr || RTA_PAYLOAD(addr_attr) != len)
+ return;
+ fprintf(f, "\n %s %s", name, rt_addr_n2a(family,
+ RTA_PAYLOAD(addr_attr),
+ RTA_DATA(addr_attr),
+ b1, sizeof(b1)));
+ if (!mask_attr || RTA_PAYLOAD(mask_attr) != len)
+ return;
+ bits = __mask_bits(RTA_DATA(mask_attr), len);
+ if (bits < 0)
+ fprintf(f, "/%s", rt_addr_n2a(family,
+ RTA_PAYLOAD(mask_attr),
+ RTA_DATA(mask_attr),
+ b1, sizeof(b1)));
+ else if (bits < len * 8)
+ fprintf(f, "/%d", bits);
+}
+
+static void flower_print_port(FILE *f, char *name, __u8 ip_proto,
+ struct rtattr *tcp_attr,
+ struct rtattr *udp_attr)
+{
+ struct rtattr *attr;
+
+ if (ip_proto == IPPROTO_TCP)
+ attr = tcp_attr;
+ else if (ip_proto == IPPROTO_UDP)
+ attr = udp_attr;
+ else
+ return;
+ if (!attr)
+ return;
+ fprintf(f, "\n %s %d", name, ntohs(rta_getattr_u16(attr)));
+}
+
+static int flower_print_opt(struct filter_util *qu, FILE *f,
+ struct rtattr *opt, __u32 handle)
+{
+ struct rtattr *tb[TCA_FLOWER_MAX + 1];
+ __be16 eth_type = 0;
+ __u8 ip_proto = 0xff;
+
+ if (!opt)
+ return 0;
+
+ parse_rtattr_nested(tb, TCA_FLOWER_MAX, opt);
+
+ if (handle)
+ fprintf(f, "handle 0x%x ", handle);
+
+ if (tb[TCA_FLOWER_CLASSID]) {
+ SPRINT_BUF(b1);
+ fprintf(f, "classid %s ",
+ sprint_tc_classid(rta_getattr_u32(tb[TCA_FLOWER_CLASSID]),
+ b1));
+ }
+
+ if (tb[TCA_FLOWER_INDEV]) {
+ struct rtattr *attr = tb[TCA_FLOWER_INDEV];
+
+ fprintf(f, "\n indev %s", rta_getattr_str(attr));
+ }
+
+ flower_print_eth_addr(f, "dst_mac", tb[TCA_FLOWER_KEY_ETH_DST],
+ tb[TCA_FLOWER_KEY_ETH_DST_MASK]);
+ flower_print_eth_addr(f, "src_mac", tb[TCA_FLOWER_KEY_ETH_SRC],
+ tb[TCA_FLOWER_KEY_ETH_SRC_MASK]);
+
+ flower_print_eth_type(f, &eth_type, tb[TCA_FLOWER_KEY_ETH_TYPE]);
+ flower_print_ip_proto(f, &ip_proto, tb[TCA_FLOWER_KEY_IP_PROTO]);
+
+ flower_print_ip_addr(f, "dst_ip", eth_type,
+ tb[TCA_FLOWER_KEY_IPV4_DST],
+ tb[TCA_FLOWER_KEY_IPV4_DST_MASK],
+ tb[TCA_FLOWER_KEY_IPV6_DST],
+ tb[TCA_FLOWER_KEY_IPV6_DST_MASK]);
+
+ flower_print_ip_addr(f, "src_ip", eth_type,
+ tb[TCA_FLOWER_KEY_IPV4_SRC],
+ tb[TCA_FLOWER_KEY_IPV4_SRC_MASK],
+ tb[TCA_FLOWER_KEY_IPV6_SRC],
+ tb[TCA_FLOWER_KEY_IPV6_SRC_MASK]);
+
+ flower_print_port(f, "dst_port", ip_proto,
+ tb[TCA_FLOWER_KEY_TCP_DST],
+ tb[TCA_FLOWER_KEY_UDP_DST]);
+
+ flower_print_port(f, "src_port", ip_proto,
+ tb[TCA_FLOWER_KEY_TCP_SRC],
+ tb[TCA_FLOWER_KEY_UDP_SRC]);
+
+ if (tb[TCA_FLOWER_ACT]) {
+ tc_print_action(f, tb[TCA_FLOWER_ACT]);
+ }
+
+ return 0;
+}
+
+struct filter_util flower_filter_util = {
+ .id = "flower",
+ .parse_fopt = flower_parse_opt,
+ .print_fopt = flower_print_opt,
+};
diff --git a/tc/f_route.c b/tc/f_route.c
index 23c4ecc7..4e9032c5 100644
--- a/tc/f_route.c
+++ b/tc/f_route.c
@@ -28,7 +28,7 @@
static void explain(void)
{
fprintf(stderr, "Usage: ... route [ from REALM | fromif TAG ] [ to REALM ]\n");
- fprintf(stderr, " [ flowid CLASSID ] [ action ACTION_SPEC ]]\n");
+ fprintf(stderr, " [ classid CLASSID ] [ action ACTION_SPEC ]\n");
fprintf(stderr, " ACTION_SPEC := ... look at individual actions\n");
fprintf(stderr, " CLASSID := X:Y\n");
fprintf(stderr, "\nNOTE: CLASSID is parsed as hexadecimal input.\n");
diff --git a/tc/f_rsvp.c b/tc/f_rsvp.c
index cb7b8fba..1fe9b15f 100644
--- a/tc/f_rsvp.c
+++ b/tc/f_rsvp.c
@@ -27,7 +27,7 @@
static void explain(void)
{
fprintf(stderr, "Usage: ... rsvp ipproto PROTOCOL session DST[/PORT | GPI ]\n");
- fprintf(stderr, " [ sender SRC[/PORT | GPI ]\n");
+ fprintf(stderr, " [ sender SRC[/PORT | GPI ] ]\n");
fprintf(stderr, " [ classid CLASSID ] [ action ACTION_SPEC ]\n");
fprintf(stderr, " [ tunnelid ID ] [ tunnel ID skip NUMBER ]\n");
fprintf(stderr, "Where: GPI := { flowlabel NUMBER | spi/ah SPI | spi/esp SPI |\n");
diff --git a/tc/f_u32.c b/tc/f_u32.c
index cb63869d..0b976789 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -61,14 +61,14 @@ static int get_u32_handle(__u32 *handle, const char *str)
if (htid>=0x1000)
return -1;
if (*tmp) {
- str = tmp+1;
+ str = tmp + 1;
hash = strtoul(str, &tmp, 16);
if (tmp == str && *str != ':' && *str != 0)
return -1;
if (hash>=0x100)
return -1;
if (*tmp) {
- str = tmp+1;
+ str = tmp + 1;
nodeid = strtoul(str, &tmp, 16);
if (tmp == str && *str != 0)
return -1;
@@ -124,9 +124,9 @@ static int pack_key(struct tc_u32_sel *sel, __u32 key, __u32 mask,
for (i=0; i<hwm; i++) {
if (sel->keys[i].off == off && sel->keys[i].offmask == offmask) {
- __u32 intersect = mask&sel->keys[i].mask;
+ __u32 intersect = mask & sel->keys[i].mask;
- if ((key^sel->keys[i].val) & intersect)
+ if ((key ^ sel->keys[i].val) & intersect)
return -1;
sel->keys[i].val |= key;
sel->keys[i].mask |= mask;
@@ -346,7 +346,7 @@ static int parse_ip_addr(int *argc_p, char ***argv_p, struct tc_u32_sel *sel,
mask = 0;
if (addr.bitlen)
- mask = htonl(0xFFFFFFFF<<(32-addr.bitlen));
+ mask = htonl(0xFFFFFFFF << (32 - addr.bitlen));
if (pack_key(sel, addr.data[0], mask, off, offmask) < 0)
return -1;
res = 0;
@@ -381,17 +381,17 @@ static int parse_ip6_addr(int *argc_p, char ***argv_p,
}
plen = addr.bitlen;
- for (i=0; i<plen; i+=32) {
-// if (((i+31)&~0x1F)<=plen) {
+ for (i = 0; i < plen; i += 32) {
+// if (((i + 31) & ~0x1F) <= plen) {
if (i + 31 <= plen) {
- res = pack_key(sel, addr.data[i/32],
- 0xFFFFFFFF, off+4*(i/32), offmask);
+ res = pack_key(sel, addr.data[i / 32],
+ 0xFFFFFFFF, off + 4 * (i / 32), offmask);
if (res < 0)
return -1;
} else if (i < plen) {
- __u32 mask = htonl(0xFFFFFFFF << (32 - (plen -i )));
- res = pack_key(sel, addr.data[i/32],
- mask, off+4*(i/32), offmask);
+ __u32 mask = htonl(0xFFFFFFFF << (32 - (plen - i)));
+ res = pack_key(sel, addr.data[i / 32],
+ mask, off + 4 * (i / 32), offmask);
if (res < 0)
return -1;
}
@@ -496,7 +496,8 @@ static int parse_ip(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
NEXT_ARG();
res = parse_ip_addr(&argc, &argv, sel, 16);
} else if (strcmp(*argv, "tos") == 0 ||
- matches(*argv, "dsfield") == 0) {
+ matches(*argv, "dsfield") == 0 ||
+ matches(*argv, "precedence") == 0) {
NEXT_ARG();
res = parse_u8(&argc, &argv, sel, 1, 0);
} else if (strcmp(*argv, "ihl") == 0) {
@@ -505,9 +506,6 @@ static int parse_ip(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
} else if (strcmp(*argv, "protocol") == 0) {
NEXT_ARG();
res = parse_u8(&argc, &argv, sel, 9, 0);
- } else if (matches(*argv, "precedence") == 0) {
- NEXT_ARG();
- res = parse_u8(&argc, &argv, sel, 1, 0);
} else if (strcmp(*argv, "nofrag") == 0) {
argc--; argv++;
res = pack_key16(sel, 0, 0x3FFF, 6, 0);
@@ -1072,9 +1070,9 @@ static int u32_parse_opt(struct filter_util *qu, char *handle,
return -1;
}
if (sample_ok)
- htid = (htid&0xFF000)|(handle&0xFFF00000);
+ htid = (htid & 0xFF000) | (handle & 0xFFF00000);
else
- htid = (handle&0xFFFFF000);
+ htid = (handle & 0xFFFFF000);
} else if (strcmp(*argv, "sample") == 0) {
__u32 hash;
unsigned divisor = 0x100;
@@ -1103,10 +1101,10 @@ static int u32_parse_opt(struct filter_util *qu, char *handle,
}
NEXT_ARG();
}
- hash = sel2.sel.keys[0].val&sel2.sel.keys[0].mask;
- hash ^= hash>>16;
- hash ^= hash>>8;
- htid = ((hash%divisor)<<12)|(htid&0xFFF00000);
+ hash = sel2.sel.keys[0].val & sel2.sel.keys[0].mask;
+ hash ^= hash >> 16;
+ hash ^= hash >> 8;
+ htid = ((hash % divisor) << 12) | (htid & 0xFFF00000);
sample_ok = 1;
continue;
} else if (strcmp(*argv, "indev") == 0) {
@@ -1165,7 +1163,7 @@ static int u32_parse_opt(struct filter_util *qu, char *handle,
addattr_l(n, MAX_MSG, TCA_U32_HASH, &htid, 4);
if (sel_ok)
addattr_l(n, MAX_MSG, TCA_U32_SEL, &sel,
- sizeof(sel.sel)+sel.sel.nkeys*sizeof(struct tc_u32_key));
+ sizeof(sel.sel) + sel.sel.nkeys * sizeof(struct tc_u32_key));
tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
return 0;
}
@@ -1173,7 +1171,7 @@ static int u32_parse_opt(struct filter_util *qu, char *handle,
static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
__u32 handle)
{
- struct rtattr *tb[TCA_U32_MAX+1];
+ struct rtattr *tb[TCA_U32_MAX + 1];
struct tc_u32_sel *sel = NULL;
struct tc_u32_pcnt *pf = NULL;
@@ -1209,9 +1207,9 @@ static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
if (tb[TCA_U32_CLASSID]) {
SPRINT_BUF(b1);
fprintf(f, "%sflowid %s ",
- !sel || !(sel->flags&TC_U32_TERMINAL) ? "*" : "",
+ !sel || !(sel->flags & TC_U32_TERMINAL) ? "*" : "",
sprint_tc_classid(rta_getattr_u32(tb[TCA_U32_CLASSID]), b1));
- } else if (sel && sel->flags&TC_U32_TERMINAL) {
+ } else if (sel && sel->flags & TC_U32_TERMINAL) {
fprintf(f, "terminal flowid ??? ");
}
if (tb[TCA_U32_LINK]) {
@@ -1254,16 +1252,16 @@ static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
}
}
- if (sel->flags&(TC_U32_VAROFFSET|TC_U32_OFFSET)) {
+ if (sel->flags & (TC_U32_VAROFFSET | TC_U32_OFFSET)) {
fprintf(f, "\n offset ");
- if (sel->flags&TC_U32_VAROFFSET)
+ if (sel->flags & TC_U32_VAROFFSET)
fprintf(f, "%04x>>%d at %d ",
ntohs(sel->offmask),
sel->offshift, sel->offoff);
if (sel->off)
fprintf(f, "plus %d ", sel->off);
}
- if (sel->flags&TC_U32_EAT)
+ if (sel->flags & TC_U32_EAT)
fprintf(f, " eat ");
if (sel->hmask) {
diff --git a/tc/m_action.c b/tc/m_action.c
index 63db8fea..8d3d51e9 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -59,7 +59,7 @@ static void act_usage(void)
"\tACTSPEC := action <ACTDETAIL> [INDEXSPEC]\n"
"\tINDEXSPEC := index <32 bit indexvalue>\n"
"\tACTDETAIL := <ACTNAME> <ACTPARAMS>\n"
- "\t\tExample ACTNAME is gact, mirred etc\n"
+ "\t\tExample ACTNAME is gact, mirred, bpf, etc\n"
"\t\tEach action has its own parameters (ACTPARAMS)\n"
"\n");
@@ -485,7 +485,7 @@ static int tc_action_gd(int cmd, unsigned flags, int *argc_p, char ***argv_p)
if (cmd == RTM_GETACTION)
ans = &req.n;
- if (rtnl_talk(&rth, &req.n, 0, 0, ans) < 0) {
+ if (rtnl_talk(&rth, &req.n, ans, MAX_MSG) < 0) {
fprintf(stderr, "We have an error talking to the kernel\n");
return 1;
}
@@ -530,7 +530,7 @@ static int tc_action_modify(int cmd, unsigned flags, int *argc_p, char ***argv_p
}
tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) {
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) {
fprintf(stderr, "We have an error talking to the kernel\n");
ret = -1;
}
@@ -600,7 +600,7 @@ static int tc_act_list_or_flush(int argc, char **argv, int event)
req.n.nlmsg_type = RTM_DELACTION;
req.n.nlmsg_flags |= NLM_F_ROOT;
req.n.nlmsg_flags |= NLM_F_REQUEST;
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) {
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) {
fprintf(stderr, "We have an error flushing\n");
return 1;
}
@@ -661,4 +661,3 @@ int do_action(int argc, char **argv)
return 0;
}
-
diff --git a/tc/m_bpf.c b/tc/m_bpf.c
index bc6cc47a..c5e2fa5b 100644
--- a/tc/m_bpf.c
+++ b/tc/m_bpf.c
@@ -1,5 +1,5 @@
/*
- * m_bpf.c BFP based action module
+ * m_bpf.c BPF based action module
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -7,107 +7,124 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: Jiri Pirko <jiri@resnulli.us>
+ * Daniel Borkmann <daniel@iogearbox.net>
*/
#include <stdio.h>
#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdbool.h>
+
+#include <linux/bpf.h>
#include <linux/tc_act/tc_bpf.h>
#include "utils.h"
-#include "rt_names.h"
#include "tc_util.h"
#include "tc_bpf.h"
+static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT;
+
+static const int nla_tbl[BPF_NLA_MAX] = {
+ [BPF_NLA_OPS_LEN] = TCA_ACT_BPF_OPS_LEN,
+ [BPF_NLA_OPS] = TCA_ACT_BPF_OPS,
+ [BPF_NLA_FD] = TCA_ACT_BPF_FD,
+ [BPF_NLA_NAME] = TCA_ACT_BPF_NAME,
+};
+
static void explain(void)
{
- fprintf(stderr, "Usage: ... bpf ...\n");
+ fprintf(stderr, "Usage: ... bpf ... [ index INDEX ]\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "BPF use case:\n");
+ fprintf(stderr, " bytecode BPF_BYTECODE\n");
+ fprintf(stderr, " bytecode-file FILE\n");
fprintf(stderr, "\n");
- fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n");
- fprintf(stderr, " [from file]: run bytecode-file FILE\n");
+ fprintf(stderr, "eBPF use case:\n");
+ fprintf(stderr, " object-file FILE [ section ACT_NAME ] [ export UDS_FILE ]");
+ fprintf(stderr, " [ verbose ]\n");
+ fprintf(stderr, " object-pinned FILE\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
- fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
- fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n");
-}
-
-static void usage(void)
-{
- explain();
- exit(-1);
+ fprintf(stderr, "c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
+ fprintf(stderr, "an ELF file containing eBPF map definitions and bytecode, or a\n");
+ fprintf(stderr, "pinned eBPF program.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n");
+ fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type));
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
+ fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Where optionally INDEX points to an existing action, or\n");
+ fprintf(stderr, "explicitly specifies an action index upon creation.\n");
}
-static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p,
- int tca_id, struct nlmsghdr *n)
+static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv,
+ int tca_id, struct nlmsghdr *n)
{
- int argc = *argc_p;
- char **argv = *argv_p;
+ const char *bpf_obj = NULL, *bpf_uds_name = NULL;
+ struct tc_act_bpf parm;
+ bool seen_run = false;
struct rtattr *tail;
- struct tc_act_bpf parm = { 0 };
- struct sock_filter bpf_ops[BPF_MAXINSNS];
- __u16 bpf_len = 0;
+ int argc, ret = 0;
+ char **argv;
+
+ argv = *ptr_argv;
+ argc = *ptr_argc;
if (matches(*argv, "bpf") != 0)
return -1;
NEXT_ARG();
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, MAX_MSG, tca_id, NULL, 0);
+
while (argc > 0) {
if (matches(*argv, "run") == 0) {
- bool from_file;
- int ret;
-
NEXT_ARG();
- if (strcmp(*argv, "bytecode-file") == 0) {
- from_file = true;
- } else if (strcmp(*argv, "bytecode") == 0) {
- from_file = false;
- } else {
- fprintf(stderr, "unexpected \"%s\"\n", *argv);
- explain();
+opt_bpf:
+ seen_run = true;
+ if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
+ &bpf_obj, &bpf_uds_name, n)) {
+ fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
return -1;
}
- NEXT_ARG();
- ret = bpf_parse_ops(argc, argv, bpf_ops, from_file);
- if (ret < 0) {
- fprintf(stderr, "Illegal \"bytecode\"\n");
- return -1;
- }
- bpf_len = ret;
} else if (matches(*argv, "help") == 0) {
- usage();
+ explain();
+ return -1;
+ } else if (matches(*argv, "index") == 0) {
+ break;
} else {
+ if (!seen_run)
+ goto opt_bpf;
break;
}
- argc--;
- argv++;
+
+ NEXT_ARG_FWD();
}
+ memset(&parm, 0, sizeof(parm));
parm.action = TC_ACT_PIPE;
+
if (argc) {
if (matches(*argv, "reclassify") == 0) {
parm.action = TC_ACT_RECLASSIFY;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
} else if (matches(*argv, "pipe") == 0) {
parm.action = TC_ACT_PIPE;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
} else if (matches(*argv, "drop") == 0 ||
matches(*argv, "shot") == 0) {
parm.action = TC_ACT_SHOT;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
} else if (matches(*argv, "continue") == 0) {
parm.action = TC_ACT_UNSPEC;
- argc--;
- argv++;
- } else if (matches(*argv, "pass") == 0) {
+ NEXT_ARG_FWD();
+ } else if (matches(*argv, "pass") == 0 ||
+ matches(*argv, "ok") == 0) {
parm.action = TC_ACT_OK;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
}
}
@@ -118,34 +135,28 @@ static int parse_bpf(struct action_util *a, int *argc_p, char ***argv_p,
fprintf(stderr, "bpf: Illegal \"index\"\n");
return -1;
}
- argc--;
- argv++;
- }
- }
- if (!bpf_len) {
- fprintf(stderr, "bpf: Bytecode needs to be passed\n");
- explain();
- return -1;
+ NEXT_ARG_FWD();
+ }
}
- tail = NLMSG_TAIL(n);
- addattr_l(n, MAX_MSG, tca_id, NULL, 0);
addattr_l(n, MAX_MSG, TCA_ACT_BPF_PARMS, &parm, sizeof(parm));
- addattr16(n, MAX_MSG, TCA_ACT_BPF_OPS_LEN, bpf_len);
- addattr_l(n, MAX_MSG, TCA_ACT_BPF_OPS, &bpf_ops,
- bpf_len * sizeof(struct sock_filter));
tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail;
- *argc_p = argc;
- *argv_p = argv;
- return 0;
+ if (bpf_uds_name)
+ ret = bpf_send_map_fds(bpf_uds_name, bpf_obj);
+
+ *ptr_argc = argc;
+ *ptr_argv = argv;
+
+ return ret;
}
-static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
+static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg)
{
struct rtattr *tb[TCA_ACT_BPF_MAX + 1];
struct tc_act_bpf *parm;
+ SPRINT_BUF(action_buf);
if (arg == NULL)
return -1;
@@ -156,15 +167,24 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
fprintf(f, "[NULL bpf parameters]");
return -1;
}
+
parm = RTA_DATA(tb[TCA_ACT_BPF_PARMS]);
+ fprintf(f, "bpf ");
- fprintf(f, " bpf ");
+ if (tb[TCA_ACT_BPF_NAME])
+ fprintf(f, "%s ", rta_getattr_str(tb[TCA_ACT_BPF_NAME]));
+ else if (tb[TCA_ACT_BPF_FD])
+ fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_ACT_BPF_FD]));
- if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN])
+ if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN]) {
bpf_print_ops(f, tb[TCA_ACT_BPF_OPS],
rta_getattr_u16(tb[TCA_ACT_BPF_OPS_LEN]));
+ fprintf(f, " ");
+ }
- fprintf(f, "\n\tindex %d ref %d bind %d", parm->index, parm->refcnt,
+ fprintf(f, "default-action %s\n", action_n2a(parm->action, action_buf,
+ sizeof(action_buf)));
+ fprintf(f, "\tindex %d ref %d bind %d", parm->index, parm->refcnt,
parm->bindcnt);
if (show_stats) {
@@ -175,12 +195,11 @@ static int print_bpf(struct action_util *au, FILE *f, struct rtattr *arg)
}
fprintf(f, "\n ");
-
return 0;
}
struct action_util bpf_action_util = {
- .id = "bpf",
- .parse_aopt = parse_bpf,
- .print_aopt = print_bpf,
+ .id = "bpf",
+ .parse_aopt = bpf_parse_opt,
+ .print_aopt = bpf_print_opt,
};
diff --git a/tc/m_connmark.c b/tc/m_connmark.c
new file mode 100644
index 00000000..6974c9ba
--- /dev/null
+++ b/tc/m_connmark.c
@@ -0,0 +1,166 @@
+/*
+ * m_connmark.c Connection tracking marking import
+ *
+ * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include "utils.h"
+#include "tc_util.h"
+#include <linux/tc_act/tc_connmark.h>
+
+static void
+explain(void)
+{
+ fprintf(stderr, "Usage: ... connmark [zone ZONE] [BRANCH] [index <INDEX>]\n");
+ fprintf(stderr, "where :\n"
+ "\tZONE is the conntrack zone\n"
+ "\tBRANCH := reclassify|pipe|drop|continue|ok\n");
+}
+
+static void
+usage(void)
+{
+ explain();
+ exit(-1);
+}
+
+static int
+parse_connmark(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
+ struct nlmsghdr *n)
+{
+ struct tc_connmark sel = {};
+ char **argv = *argv_p;
+ int argc = *argc_p;
+ int ok = 0;
+ struct rtattr *tail;
+
+ while (argc > 0) {
+ if (matches(*argv, "connmark") == 0) {
+ ok = 1;
+ argc--;
+ argv++;
+ } else if (matches(*argv, "help") == 0) {
+ usage();
+ } else {
+ break;
+ }
+
+ }
+
+ if (!ok) {
+ explain();
+ return -1;
+ }
+
+ if (argc) {
+ if (matches(*argv, "zone") == 0) {
+ NEXT_ARG();
+ if (get_u16(&sel.zone, *argv, 10)) {
+ fprintf(stderr, "simple: Illegal \"index\"\n");
+ return -1;
+ }
+ argc--;
+ argv++;
+ }
+ }
+
+ sel.action = TC_ACT_PIPE;
+ if (argc) {
+ if (matches(*argv, "reclassify") == 0) {
+ sel.action = TC_ACT_RECLASSIFY;
+ argc--;
+ argv++;
+ } else if (matches(*argv, "pipe") == 0) {
+ sel.action = TC_ACT_PIPE;
+ argc--;
+ argv++;
+ } else if (matches(*argv, "drop") == 0 ||
+ matches(*argv, "shot") == 0) {
+ sel.action = TC_ACT_SHOT;
+ argc--;
+ argv++;
+ } else if (matches(*argv, "continue") == 0) {
+ sel.action = TC_ACT_UNSPEC;
+ argc--;
+ argv++;
+ } else if (matches(*argv, "pass") == 0) {
+ sel.action = TC_ACT_OK;
+ argc--;
+ argv++;
+ }
+ }
+
+ if (argc) {
+ if (matches(*argv, "index") == 0) {
+ NEXT_ARG();
+ if (get_u32(&sel.index, *argv, 10)) {
+ fprintf(stderr, "simple: Illegal \"index\"\n");
+ return -1;
+ }
+ argc--;
+ argv++;
+ }
+ }
+
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, MAX_MSG, tca_id, NULL, 0);
+ addattr_l(n, MAX_MSG, TCA_CONNMARK_PARMS, &sel, sizeof(sel));
+ tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail;
+
+ *argc_p = argc;
+ *argv_p = argv;
+ return 0;
+}
+
+static int print_connmark(struct action_util *au, FILE *f, struct rtattr *arg)
+{
+ struct rtattr *tb[TCA_CONNMARK_MAX + 1];
+ struct tc_connmark *ci;
+
+ if (arg == NULL)
+ return -1;
+
+ parse_rtattr_nested(tb, TCA_CONNMARK_MAX, arg);
+ if (tb[TCA_CONNMARK_PARMS] == NULL) {
+ fprintf(f, "[NULL connmark parameters]");
+ return -1;
+ }
+
+ ci = RTA_DATA(tb[TCA_CONNMARK_PARMS]);
+
+ fprintf(f, " connmark zone %d\n", ci->zone);
+ fprintf(f, "\t index %d ref %d bind %d", ci->index,
+ ci->refcnt, ci->bindcnt);
+
+ if (show_stats) {
+ if (tb[TCA_CONNMARK_TM]) {
+ struct tcf_t *tm = RTA_DATA(tb[TCA_CONNMARK_TM]);
+ print_tm(f, tm);
+ }
+ }
+ fprintf(f, "\n");
+
+ return 0;
+}
+
+struct action_util connmark_action_util = {
+ .id = "connmark",
+ .parse_aopt = parse_connmark,
+ .print_aopt = print_connmark,
+};
diff --git a/tc/m_ipt.c b/tc/m_ipt.c
index e5c48977..948becbc 100644
--- a/tc/m_ipt.c
+++ b/tc/m_ipt.c
@@ -618,4 +618,3 @@ struct action_util ipt_action_util = {
.parse_aopt = parse_ipt,
.print_aopt = print_ipt,
};
-
diff --git a/tc/m_pedit.c b/tc/m_pedit.c
index dfe9b2eb..4fdd189d 100644
--- a/tc/m_pedit.c
+++ b/tc/m_pedit.c
@@ -160,17 +160,9 @@ pack_key32(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey)
int
pack_key16(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey)
{
- int ind = 0, stride = 0;
+ int ind, stride;
__u32 m[4] = {0xFFFF0000,0xFF0000FF,0x0000FFFF};
- if (0 > tkey->off) {
- ind = tkey->off + 1;
- if (0 > ind)
- ind = -1*ind;
- } else {
- ind = tkey->off;
- }
-
if (tkey->val > 0xFFFF || tkey->mask > 0xFFFF) {
fprintf(stderr, "pack_key16 bad value\n");
return -1;
@@ -178,18 +170,16 @@ pack_key16(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey)
ind = tkey->off & 3;
- if (0 > ind || 2 < ind) {
+ if (ind == 3) {
fprintf(stderr, "pack_key16 bad index value %d\n",ind);
return -1;
}
stride = 8 * ind;
tkey->val = htons(tkey->val);
- if (stride > 0) {
- tkey->val <<= stride;
- tkey->mask <<= stride;
- retain <<= stride;
- }
+ tkey->val <<= stride;
+ tkey->mask <<= stride;
+ retain <<= stride;
tkey->mask = retain|m[ind];
tkey->off &= ~3;
@@ -203,28 +193,22 @@ pack_key16(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey)
int
pack_key8(__u32 retain,struct tc_pedit_sel *sel,struct tc_pedit_key *tkey)
{
- int ind = 0, stride = 0;
+ int ind, stride;
__u32 m[4] = {0xFFFFFF00,0xFFFF00FF,0xFF00FFFF,0x00FFFFFF};
- if (0 > tkey->off) {
- ind = tkey->off + 1;
- if (0 > ind)
- ind = -1*ind;
- } else {
- ind = tkey->off;
- }
-
if (tkey->val > 0xFF || tkey->mask > 0xFF) {
fprintf(stderr, "pack_key8 bad value (val %x mask %x\n", tkey->val, tkey->mask);
return -1;
}
ind = tkey->off & 3;
+
stride = 8 * ind;
tkey->val <<= stride;
tkey->mask <<= stride;
retain <<= stride;
tkey->mask = retain|m[ind];
+
tkey->off &= ~3;
if (pedit_debug)
diff --git a/tc/m_simple.c b/tc/m_simple.c
index 866552f5..1ad55268 100644
--- a/tc/m_simple.c
+++ b/tc/m_simple.c
@@ -138,7 +138,7 @@ parse_simple(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
}
if (strlen(simpdata) > (SIMP_MAX_DATA - 1)) {
- fprintf(stderr, "simple: Illegal string len %ld <%s> \n",
+ fprintf(stderr, "simple: Illegal string len %zu <%s> \n",
strlen(simpdata), simpdata);
return -1;
}
diff --git a/tc/m_xt_old.c b/tc/m_xt_old.c
index 0ea0b4a9..6e643088 100644
--- a/tc/m_xt_old.c
+++ b/tc/m_xt_old.c
@@ -433,4 +433,3 @@ struct action_util ipt_action_util = {
.parse_aopt = parse_ipt,
.print_aopt = print_ipt,
};
-
diff --git a/tc/p_tcp.c b/tc/p_tcp.c
index 7f4b6f4a..32ffc027 100644
--- a/tc/p_tcp.c
+++ b/tc/p_tcp.c
@@ -34,5 +34,3 @@ struct m_pedit_util p_pedit_tcp = {
"tcp",
parse_tcp,
};
-
-
diff --git a/tc/p_udp.c b/tc/p_udp.c
index 17762896..2b9b88fc 100644
--- a/tc/p_udp.c
+++ b/tc/p_udp.c
@@ -35,4 +35,3 @@ struct m_pedit_util p_pedit_udp = {
"udp",
parse_udp,
};
-
diff --git a/tc/q_cbq.c b/tc/q_cbq.c
index d76600cc..38a61630 100644
--- a/tc/q_cbq.c
+++ b/tc/q_cbq.c
@@ -582,4 +582,3 @@ struct qdisc_util cbq_qdisc_util = {
.parse_copt = cbq_parse_class_opt,
.print_copt = cbq_print_opt,
};
-
diff --git a/tc/q_clsact.c b/tc/q_clsact.c
new file mode 100644
index 00000000..0c05dbd3
--- /dev/null
+++ b/tc/q_clsact.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... clsact\n");
+}
+
+static int clsact_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+ struct nlmsghdr *n)
+{
+ if (argc > 0) {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+
+ addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+ return 0;
+}
+
+static int clsact_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ return 0;
+}
+
+struct qdisc_util clsact_qdisc_util = {
+ .id = "clsact",
+ .parse_qopt = clsact_parse_opt,
+ .print_qopt = clsact_print_opt,
+};
diff --git a/tc/q_codel.c b/tc/q_codel.c
index dc4b3f6c..c24246c5 100644
--- a/tc/q_codel.c
+++ b/tc/q_codel.c
@@ -4,7 +4,7 @@
* Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
* Copyright (C) 2011-2012 Van Jacobson <van@pollere.com>
* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -55,6 +55,7 @@ static void explain(void)
{
fprintf(stderr, "Usage: ... codel [ limit PACKETS ] [ target TIME]\n");
fprintf(stderr, " [ interval TIME ] [ ecn | noecn ]\n");
+ fprintf(stderr, " [ ce_threshold TIME ]\n");
}
static int codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
@@ -63,6 +64,7 @@ static int codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
unsigned limit = 0;
unsigned target = 0;
unsigned interval = 0;
+ unsigned ce_threshold = ~0U;
int ecn = -1;
struct rtattr *tail;
@@ -79,6 +81,12 @@ static int codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
fprintf(stderr, "Illegal \"target\"\n");
return -1;
}
+ } else if (strcmp(*argv, "ce_threshold") == 0) {
+ NEXT_ARG();
+ if (get_time(&ce_threshold, *argv)) {
+ fprintf(stderr, "Illegal \"ce_threshold\"\n");
+ return -1;
+ }
} else if (strcmp(*argv, "interval") == 0) {
NEXT_ARG();
if (get_time(&interval, *argv)) {
@@ -110,6 +118,10 @@ static int codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
addattr_l(n, 1024, TCA_CODEL_TARGET, &target, sizeof(target));
if (ecn != -1)
addattr_l(n, 1024, TCA_CODEL_ECN, &ecn, sizeof(ecn));
+ if (ce_threshold != ~0U)
+ addattr_l(n, 1024, TCA_CODEL_CE_THRESHOLD,
+ &ce_threshold, sizeof(ce_threshold));
+
tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
return 0;
}
@@ -121,6 +133,7 @@ static int codel_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
unsigned interval;
unsigned target;
unsigned ecn;
+ unsigned ce_threshold;
SPRINT_BUF(b1);
if (opt == NULL)
@@ -138,6 +151,11 @@ static int codel_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
target = rta_getattr_u32(tb[TCA_CODEL_TARGET]);
fprintf(f, "target %s ", sprint_time(target, b1));
}
+ if (tb[TCA_CODEL_CE_THRESHOLD] &&
+ RTA_PAYLOAD(tb[TCA_CODEL_CE_THRESHOLD]) >= sizeof(__u32)) {
+ ce_threshold = rta_getattr_u32(tb[TCA_CODEL_CE_THRESHOLD]);
+ fprintf(f, "ce_threshold %s ", sprint_time(ce_threshold, b1));
+ }
if (tb[TCA_CODEL_INTERVAL] &&
RTA_PAYLOAD(tb[TCA_CODEL_INTERVAL]) >= sizeof(__u32)) {
interval = rta_getattr_u32(tb[TCA_CODEL_INTERVAL]);
@@ -156,16 +174,19 @@ static int codel_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
static int codel_print_xstats(struct qdisc_util *qu, FILE *f,
struct rtattr *xstats)
{
- struct tc_codel_xstats *st;
+ struct tc_codel_xstats _st, *st;
SPRINT_BUF(b1);
if (xstats == NULL)
return 0;
- if (RTA_PAYLOAD(xstats) < sizeof(*st))
- return -1;
-
st = RTA_DATA(xstats);
+ if (RTA_PAYLOAD(xstats) < sizeof(*st)) {
+ memset(&_st, 0, sizeof(_st));
+ memcpy(&_st, st, RTA_PAYLOAD(xstats));
+ st = &_st;
+ }
+
fprintf(f, " count %u lastcount %u ldelay %s",
st->count, st->lastcount, sprint_time(st->ldelay, b1));
if (st->dropping)
@@ -176,6 +197,8 @@ static int codel_print_xstats(struct qdisc_util *qu, FILE *f,
fprintf(f, " drop_next %s", sprint_time(st->drop_next, b1));
fprintf(f, "\n maxpacket %u ecn_mark %u drop_overlimit %u",
st->maxpacket, st->ecn_mark, st->drop_overlimit);
+ if (st->ce_mark)
+ fprintf(f, " ce_mark %u", st->ce_mark);
return 0;
}
diff --git a/tc/q_fq.c b/tc/q_fq.c
index e7288c20..2a370b36 100644
--- a/tc/q_fq.c
+++ b/tc/q_fq.c
@@ -1,7 +1,7 @@
/*
* Fair Queue
*
- * Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -54,7 +54,8 @@ static void explain(void)
fprintf(stderr, "Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ]\n");
fprintf(stderr, " [ quantum BYTES ] [ initial_quantum BYTES ]\n");
fprintf(stderr, " [ maxrate RATE ] [ buckets NUMBER ]\n");
- fprintf(stderr, " [ [no]pacing ]\n");
+ fprintf(stderr, " [ [no]pacing ] [ refill_delay TIME ]\n");
+ fprintf(stderr, " [ orphan_mask MASK]\n");
}
static unsigned int ilog2(unsigned int val)
@@ -79,12 +80,16 @@ static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
unsigned int buckets = 0;
unsigned int maxrate;
unsigned int defrate;
+ unsigned int refill_delay;
+ unsigned int orphan_mask;
bool set_plimit = false;
bool set_flow_plimit = false;
bool set_quantum = false;
bool set_initial_quantum = false;
bool set_maxrate = false;
bool set_defrate = false;
+ bool set_refill_delay = false;
+ bool set_orphan_mask = false;
int pacing = -1;
struct rtattr *tail;
@@ -137,6 +142,20 @@ static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
return -1;
}
set_initial_quantum = true;
+ } else if (strcmp(*argv, "orphan_mask") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&orphan_mask, *argv, 0)) {
+ fprintf(stderr, "Illegal \"initial_quantum\"\n");
+ return -1;
+ }
+ set_orphan_mask = true;
+ } else if (strcmp(*argv, "refill_delay") == 0) {
+ NEXT_ARG();
+ if (get_time(&refill_delay, *argv)) {
+ fprintf(stderr, "Illegal \"refill_delay\"\n");
+ return -1;
+ }
+ set_refill_delay = true;
} else if (strcmp(*argv, "pacing") == 0) {
pacing = 1;
} else if (strcmp(*argv, "nopacing") == 0) {
@@ -180,6 +199,12 @@ static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
if (set_defrate)
addattr_l(n, 1024, TCA_FQ_FLOW_DEFAULT_RATE,
&defrate, sizeof(defrate));
+ if (set_refill_delay)
+ addattr_l(n, 1024, TCA_FQ_FLOW_REFILL_DELAY,
+ &refill_delay, sizeof(refill_delay));
+ if (set_orphan_mask)
+ addattr_l(n, 1024, TCA_FQ_ORPHAN_MASK,
+ &orphan_mask, sizeof(refill_delay));
tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
return 0;
}
@@ -191,6 +216,8 @@ static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
unsigned int buckets_log;
int pacing;
unsigned int rate, quantum;
+ unsigned int refill_delay;
+ unsigned int orphan_mask;
SPRINT_BUF(b1);
if (opt == NULL)
@@ -213,6 +240,11 @@ static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
buckets_log = rta_getattr_u32(tb[TCA_FQ_BUCKETS_LOG]);
fprintf(f, "buckets %u ", 1U << buckets_log);
}
+ if (tb[TCA_FQ_ORPHAN_MASK] &&
+ RTA_PAYLOAD(tb[TCA_FQ_ORPHAN_MASK]) >= sizeof(__u32)) {
+ orphan_mask = rta_getattr_u32(tb[TCA_FQ_ORPHAN_MASK]);
+ fprintf(f, "orphan_mask %u ", orphan_mask);
+ }
if (tb[TCA_FQ_RATE_ENABLE] &&
RTA_PAYLOAD(tb[TCA_FQ_RATE_ENABLE]) >= sizeof(int)) {
pacing = rta_getattr_u32(tb[TCA_FQ_RATE_ENABLE]);
@@ -243,6 +275,11 @@ static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
if (rate != 0)
fprintf(f, "defrate %s ", sprint_rate(rate, b1));
}
+ if (tb[TCA_FQ_FLOW_REFILL_DELAY] &&
+ RTA_PAYLOAD(tb[TCA_FQ_FLOW_REFILL_DELAY]) >= sizeof(__u32)) {
+ refill_delay = rta_getattr_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]);
+ fprintf(f, "refill_delay %s ", sprint_time(refill_delay, b1));
+ }
return 0;
}
diff --git a/tc/q_fq_codel.c b/tc/q_fq_codel.c
index 1d3bfa2a..4f747ebd 100644
--- a/tc/q_fq_codel.c
+++ b/tc/q_fq_codel.c
@@ -1,7 +1,7 @@
/*
* Fair Queue Codel
*
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -53,6 +53,7 @@ static void explain(void)
fprintf(stderr, "Usage: ... fq_codel [ limit PACKETS ] [ flows NUMBER ]\n");
fprintf(stderr, " [ target TIME] [ interval TIME ]\n");
fprintf(stderr, " [ quantum BYTES ] [ [no]ecn ]\n");
+ fprintf(stderr, " [ ce_threshold TIME ]\n");
}
static int fq_codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
@@ -63,6 +64,7 @@ static int fq_codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
unsigned target = 0;
unsigned interval = 0;
unsigned quantum = 0;
+ unsigned ce_threshold = ~0U;
int ecn = -1;
struct rtattr *tail;
@@ -91,6 +93,12 @@ static int fq_codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
fprintf(stderr, "Illegal \"target\"\n");
return -1;
}
+ } else if (strcmp(*argv, "ce_threshold") == 0) {
+ NEXT_ARG();
+ if (get_time(&ce_threshold, *argv)) {
+ fprintf(stderr, "Illegal \"ce_threshold\"\n");
+ return -1;
+ }
} else if (strcmp(*argv, "interval") == 0) {
NEXT_ARG();
if (get_time(&interval, *argv)) {
@@ -126,6 +134,9 @@ static int fq_codel_parse_opt(struct qdisc_util *qu, int argc, char **argv,
addattr_l(n, 1024, TCA_FQ_CODEL_TARGET, &target, sizeof(target));
if (ecn != -1)
addattr_l(n, 1024, TCA_FQ_CODEL_ECN, &ecn, sizeof(ecn));
+ if (ce_threshold != ~0U)
+ addattr_l(n, 1024, TCA_FQ_CODEL_CE_THRESHOLD,
+ &ce_threshold, sizeof(ce_threshold));
tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
return 0;
}
@@ -139,6 +150,7 @@ static int fq_codel_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt
unsigned target;
unsigned ecn;
unsigned quantum;
+ unsigned ce_threshold;
SPRINT_BUF(b1);
if (opt == NULL)
@@ -166,6 +178,11 @@ static int fq_codel_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt
target = rta_getattr_u32(tb[TCA_FQ_CODEL_TARGET]);
fprintf(f, "target %s ", sprint_time(target, b1));
}
+ if (tb[TCA_FQ_CODEL_CE_THRESHOLD] &&
+ RTA_PAYLOAD(tb[TCA_FQ_CODEL_CE_THRESHOLD]) >= sizeof(__u32)) {
+ ce_threshold = rta_getattr_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
+ fprintf(f, "ce_threshold %s ", sprint_time(ce_threshold, b1));
+ }
if (tb[TCA_FQ_CODEL_INTERVAL] &&
RTA_PAYLOAD(tb[TCA_FQ_CODEL_INTERVAL]) >= sizeof(__u32)) {
interval = rta_getattr_u32(tb[TCA_FQ_CODEL_INTERVAL]);
@@ -184,22 +201,26 @@ static int fq_codel_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt
static int fq_codel_print_xstats(struct qdisc_util *qu, FILE *f,
struct rtattr *xstats)
{
- struct tc_fq_codel_xstats *st;
+ struct tc_fq_codel_xstats _st, *st;
SPRINT_BUF(b1);
if (xstats == NULL)
return 0;
- if (RTA_PAYLOAD(xstats) < sizeof(*st))
- return -1;
-
st = RTA_DATA(xstats);
+ if (RTA_PAYLOAD(xstats) < sizeof(*st)) {
+ memset(&_st, 0, sizeof(_st));
+ memcpy(&_st, st, RTA_PAYLOAD(xstats));
+ st = &_st;
+ }
if (st->type == TCA_FQ_CODEL_XSTATS_QDISC) {
fprintf(f, " maxpacket %u drop_overlimit %u new_flow_count %u ecn_mark %u",
st->qdisc_stats.maxpacket,
st->qdisc_stats.drop_overlimit,
st->qdisc_stats.new_flow_count,
st->qdisc_stats.ecn_mark);
+ if (st->qdisc_stats.ce_mark)
+ fprintf(f, " ce_mark %u", st->qdisc_stats.ce_mark);
fprintf(f, "\n new_flows_len %u old_flows_len %u",
st->qdisc_stats.new_flows_len,
st->qdisc_stats.old_flows_len);
diff --git a/tc/q_gred.c b/tc/q_gred.c
index 88bd0947..f31daa37 100644
--- a/tc/q_gred.c
+++ b/tc/q_gred.c
@@ -37,14 +37,11 @@
static void explain(void)
{
- fprintf(stderr, "Usage: ... gred DP drop-probability limit BYTES "
- "min BYTES max BYTES\n");
- fprintf(stderr, " avpkt BYTES burst PACKETS probability PROBABILITY "
- "bandwidth KBPS\n");
- fprintf(stderr, " [prio value]\n");
- fprintf(stderr," OR ...\n");
- fprintf(stderr," gred setup DPs <num of DPs> default <default DP> "
- "[grio]\n");
+ fprintf(stderr, "Usage: tc qdisc { add | replace | change } ... gred setup vqs NUMBER\n");
+ fprintf(stderr, " default DEFAULT_VQ [ grio ] [ limit BYTES ]\n");
+ fprintf(stderr, " tc qdisc change ... gred vq VQ [ prio VALUE ] limit BYTES\n");
+ fprintf(stderr, " min BYTES max BYTES avpkt BYTES [ burst PACKETS ]\n");
+ fprintf(stderr, " [ probability PROBABILITY ] [ bandwidth KBPS ]\n");
}
static int init_gred(struct qdisc_util *qu, int argc, char **argv,
@@ -53,38 +50,46 @@ static int init_gred(struct qdisc_util *qu, int argc, char **argv,
struct rtattr *tail;
struct tc_gred_sopt opt = { 0 };
- int dps = 0;
- int def_dp = -1;
+ __u32 limit = 0;
+
+ opt.def_DP = MAX_DPs;
while (argc > 0) {
DPRINTF(stderr,"init_gred: invoked with %s\n",*argv);
- if (strcmp(*argv, "DPs") == 0) {
+ if (strcmp(*argv, "vqs") == 0 ||
+ strcmp(*argv, "DPs") == 0) {
NEXT_ARG();
- DPRINTF(stderr,"init_gred: next_arg with %s\n",*argv);
- dps = strtol(*argv, (char **)NULL, 10);
- if (dps < 0 || dps >MAX_DPs) {
- fprintf(stderr, "DPs =%d\n", dps);
- fprintf(stderr, "Illegal \"DPs\"\n");
- fprintf(stderr, "GRED: only %d DPs are "
- "currently supported\n",MAX_DPs);
+ if (get_unsigned(&opt.DPs, *argv, 10)) {
+ fprintf(stderr, "Illegal \"vqs\"\n");
+ return -1;
+ } else if (opt.DPs > MAX_DPs) {
+ fprintf(stderr, "GRED: only %u VQs are "
+ "currently supported\n", MAX_DPs);
return -1;
}
} else if (strcmp(*argv, "default") == 0) {
- NEXT_ARG();
- def_dp = strtol(*argv, (char **)NULL, 10);
- if (dps == 0) {
- fprintf(stderr, "\"default DP\" must be "
- "defined after DPs\n");
+ if (opt.DPs == 0) {
+ fprintf(stderr, "\"default\" must be defined "
+ "after \"vqs\"\n");
return -1;
}
- if (def_dp < 0 || def_dp > dps) {
- fprintf(stderr,
- "\"default DP\" must be less than %d\n",
- opt.DPs);
+ NEXT_ARG();
+ if (get_unsigned(&opt.def_DP, *argv, 10)) {
+ fprintf(stderr, "Illegal \"default\"\n");
+ return -1;
+ } else if (opt.def_DP >= opt.DPs) {
+ fprintf(stderr, "\"default\" must be less than "
+ "\"vqs\"\n");
return -1;
}
} else if (strcmp(*argv, "grio") == 0) {
opt.grio = 1;
+ } else if (strcmp(*argv, "limit") == 0) {
+ NEXT_ARG();
+ if (get_size(&limit, *argv)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
} else if (strcmp(*argv, "help") == 0) {
explain();
return -1;
@@ -96,19 +101,18 @@ static int init_gred(struct qdisc_util *qu, int argc, char **argv,
argc--; argv++;
}
- if (!dps || def_dp == -1) {
+ if (!opt.DPs || opt.def_DP == MAX_DPs) {
fprintf(stderr, "Illegal gred setup parameters \n");
return -1;
}
- opt.DPs = dps;
- opt.def_DP = def_dp;
-
- DPRINTF("TC_GRED: sending DPs=%d default=%d\n",opt.DPs,opt.def_DP);
+ DPRINTF("TC_GRED: sending DPs=%u def_DP=%u\n",opt.DPs,opt.def_DP);
n->nlmsg_flags|=NLM_F_CREATE;
tail = NLMSG_TAIL(n);
addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
addattr_l(n, 1024, TCA_GRED_DPS, &opt, sizeof(struct tc_gred_sopt));
+ if (limit)
+ addattr32(n, 1024, TCA_GRED_LIMIT, limit);
tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
return 0;
}
@@ -118,17 +122,17 @@ static int init_gred(struct qdisc_util *qu, int argc, char **argv,
static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
{
int ok=0;
- struct tc_gred_qopt opt;
+ struct tc_gred_qopt opt = { 0 };
unsigned burst = 0;
unsigned avpkt = 0;
double probability = 0.02;
unsigned rate = 0;
- int wlog;
+ int parm;
__u8 sbuf[256];
struct rtattr *tail;
__u32 max_P;
- memset(&opt, 0, sizeof(opt));
+ opt.DP = MAX_DPs;
while (argc > 0) {
if (strcmp(*argv, "limit") == 0) {
@@ -143,8 +147,7 @@ static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct n
fprintf(stderr, "Illegal \"setup\"\n");
return -1;
}
- return init_gred(qu,argc-1, argv+1,n);
-
+ return init_gred(qu, argc-1, argv+1, n);
} else if (strcmp(*argv, "min") == 0) {
NEXT_ARG();
if (get_size(&opt.qth_min, *argv)) {
@@ -159,20 +162,21 @@ static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct n
return -1;
}
ok++;
- } else if (strcmp(*argv, "DP") == 0) {
+ } else if (strcmp(*argv, "vq") == 0 ||
+ strcmp(*argv, "DP") == 0) {
NEXT_ARG();
- opt.DP=strtol(*argv, (char **)NULL, 10);
- DPRINTF ("\n ******* DP =%u\n",opt.DP);
- if (opt.DP >MAX_DPs) { /* need a better error check */
- fprintf(stderr, "DP =%u \n",opt.DP);
- fprintf(stderr, "Illegal \"DP\"\n");
- fprintf(stderr, "GRED: only %d DPs are currently supported\n",MAX_DPs);
+ if (get_unsigned(&opt.DP, *argv, 10)) {
+ fprintf(stderr, "Illegal \"vq\"\n");
return -1;
- }
+ } else if (opt.DP >= MAX_DPs) {
+ fprintf(stderr, "GRED: only %u VQs are "
+ "currently supported\n", MAX_DPs);
+ return -1;
+ } /* need a better error check */
ok++;
} else if (strcmp(*argv, "burst") == 0) {
NEXT_ARG();
- if (get_unsigned(&burst, *argv, 0)) {
+ if (get_unsigned(&burst, *argv, 0)) {
fprintf(stderr, "Illegal \"burst\"\n");
return -1;
}
@@ -214,40 +218,44 @@ static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct n
argc--; argv++;
}
- if (rate == 0)
- get_rate(&rate, "10Mbit");
-
- if (!opt.qth_min || !opt.qth_max || !opt.limit || !avpkt ||
- (opt.DP<0)) {
- fprintf(stderr, "Required parameter (min, max, limit, "
- "avpkt, DP) is missing\n");
+ if (!ok) {
+ explain();
+ return -1;
+ }
+ if (opt.DP == MAX_DPs || !opt.limit || !opt.qth_min || !opt.qth_max ||
+ !avpkt) {
+ fprintf(stderr, "Required parameter (vq, limit, min, max, "
+ "avpkt) is missing\n");
return -1;
}
if (!burst) {
burst = (2 * opt.qth_min + opt.qth_max) / (3 * avpkt);
fprintf(stderr, "GRED: set burst to %u\n", burst);
}
-
- if ((wlog = tc_red_eval_ewma(opt.qth_min, burst, avpkt)) < 0) {
+ if (!rate) {
+ get_rate(&rate, "10Mbit");
+ fprintf(stderr, "GRED: set bandwidth to 10Mbit\n");
+ }
+ if ((parm = tc_red_eval_ewma(opt.qth_min, burst, avpkt)) < 0) {
fprintf(stderr, "GRED: failed to calculate EWMA constant.\n");
return -1;
}
- if (wlog >= 10)
- fprintf(stderr, "GRED: WARNING. Burst %d seems to be too "
+ if (parm >= 10)
+ fprintf(stderr, "GRED: WARNING. Burst %u seems to be too "
"large.\n", burst);
- opt.Wlog = wlog;
- if ((wlog = tc_red_eval_P(opt.qth_min, opt.qth_max, probability)) < 0) {
+ opt.Wlog = parm;
+ if ((parm = tc_red_eval_P(opt.qth_min, opt.qth_max, probability)) < 0) {
fprintf(stderr, "GRED: failed to calculate probability.\n");
return -1;
}
- opt.Plog = wlog;
- if ((wlog = tc_red_eval_idle_damping(opt.Wlog, avpkt, rate, sbuf)) < 0)
+ opt.Plog = parm;
+ if ((parm = tc_red_eval_idle_damping(opt.Wlog, avpkt, rate, sbuf)) < 0)
{
fprintf(stderr, "GRED: failed to calculate idle damping "
"table.\n");
return -1;
}
- opt.Scell_log = wlog;
+ opt.Scell_log = parm;
tail = NLMSG_TAIL(n);
addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
@@ -262,14 +270,14 @@ static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct n
static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
{
struct rtattr *tb[TCA_GRED_MAX + 1];
+ struct tc_gred_sopt *sopt;
struct tc_gred_qopt *qopt;
__u32 *max_p = NULL;
- int i;
+ __u32 *limit = NULL;
+ unsigned i;
SPRINT_BUF(b1);
SPRINT_BUF(b2);
SPRINT_BUF(b3);
- SPRINT_BUF(b4);
- SPRINT_BUF(b5);
if (opt == NULL)
return 0;
@@ -283,40 +291,58 @@ static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
RTA_PAYLOAD(tb[TCA_GRED_MAX_P]) >= sizeof(__u32) * MAX_DPs)
max_p = RTA_DATA(tb[TCA_GRED_MAX_P]);
+ if (tb[TCA_GRED_LIMIT] &&
+ RTA_PAYLOAD(tb[TCA_GRED_LIMIT]) == sizeof(__u32))
+ limit = RTA_DATA(tb[TCA_GRED_LIMIT]);
+
+ sopt = RTA_DATA(tb[TCA_GRED_DPS]);
qopt = RTA_DATA(tb[TCA_GRED_PARMS]);
- if (RTA_PAYLOAD(tb[TCA_GRED_PARMS]) < sizeof(*qopt)*MAX_DPs) {
+ if (RTA_PAYLOAD(tb[TCA_GRED_DPS]) < sizeof(*sopt) ||
+ RTA_PAYLOAD(tb[TCA_GRED_PARMS]) < sizeof(*qopt)*MAX_DPs) {
fprintf(f,"\n GRED received message smaller than expected\n");
return -1;
- }
+ }
/* Bad hack! should really return a proper message as shown above*/
+ fprintf(f, "vqs %u default %u %s",
+ sopt->DPs,
+ sopt->def_DP,
+ sopt->grio ? "grio " : "");
+
+ if (limit)
+ fprintf(f, "limit %s ",
+ sprint_size(*limit, b1));
+
for (i=0;i<MAX_DPs;i++, qopt++) {
if (qopt->DP >= MAX_DPs) continue;
- fprintf(f, "\n DP:%d (prio %d) Average Queue %s Measured "
- "Queue %s ",
+ fprintf(f, "\n vq %u prio %hhu limit %s min %s max %s ",
qopt->DP,
qopt->prio,
- sprint_size(qopt->qave, b4),
- sprint_size(qopt->backlog, b5));
- fprintf(f, "\n\t Packet drops: %d (forced %d early %d) ",
- qopt->forced+qopt->early,
- qopt->forced,
- qopt->early);
- fprintf(f, "\n\t Packet totals: %u (bytes %u) ",
- qopt->packets,
- qopt->bytesin);
- if (show_details)
- fprintf(f, "\n limit %s min %s max %s ",
- sprint_size(qopt->limit, b1),
- sprint_size(qopt->qth_min, b2),
- sprint_size(qopt->qth_max, b3));
- fprintf(f, "ewma %u ", qopt->Wlog);
- if (max_p)
- fprintf(f, "probability %lg ", max_p[i] / pow(2, 32));
- else
- fprintf(f, "Plog %u ", qopt->Plog);
- fprintf(f, "Scell_log %u", qopt->Scell_log);
+ sprint_size(qopt->limit, b1),
+ sprint_size(qopt->qth_min, b2),
+ sprint_size(qopt->qth_max, b3));
+ if (show_details) {
+ fprintf(f, "ewma %u ", qopt->Wlog);
+ if (max_p)
+ fprintf(f, "probability %lg ", max_p[i] / pow(2, 32));
+ else
+ fprintf(f, "Plog %u ", qopt->Plog);
+ fprintf(f, "Scell_log %u ", qopt->Scell_log);
+ }
+ if (show_stats) {
+ fprintf(f, "\n Queue size: average %s current %s ",
+ sprint_size(qopt->qave, b1),
+ sprint_size(qopt->backlog, b2));
+ fprintf(f, "\n Dropped packets: forced %u early %u pdrop %u other %u ",
+ qopt->forced,
+ qopt->early,
+ qopt->pdrop,
+ qopt->other);
+ fprintf(f, "\n Total packets: %u (%s) ",
+ qopt->packets,
+ sprint_size(qopt->bytesin, b1));
+ }
}
return 0;
}
diff --git a/tc/q_ingress.c b/tc/q_ingress.c
index ba58e722..c3c9b403 100644
--- a/tc/q_ingress.c
+++ b/tc/q_ingress.c
@@ -1,5 +1,4 @@
/*
- *
* q_ingress.c INGRESS.
*
* This program is free software; you can redistribute it and/or
@@ -8,20 +7,9 @@
* 2 of the License, or (at your option) any later version.
*
* Authors: J Hadi Salim
- *
- * This is here just in case it is needed
- * useless right now; might be useful in the future
- *
*/
#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <syslog.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
#include <string.h>
#include "utils.h"
@@ -29,23 +17,20 @@
static void explain(void)
{
- fprintf(stderr, "Usage: ... ingress \n");
+ fprintf(stderr, "Usage: ... ingress\n");
}
-static int ingress_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+static int ingress_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+ struct nlmsghdr *n)
{
-
- if (argc > 0) {
- while (argc > 0) {
-
- if (strcmp(*argv, "handle") == 0) {
- NEXT_ARG();
- argc--; argv++;
- } else {
- fprintf(stderr, "What is \"%s\"?\n", *argv);
- explain();
- return -1;
- }
+ while (argc > 0) {
+ if (strcmp(*argv, "handle") == 0) {
+ NEXT_ARG();
+ argc--; argv++;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
}
}
@@ -53,10 +38,10 @@ static int ingress_parse_opt(struct qdisc_util *qu, int argc, char **argv, struc
return 0;
}
-static int ingress_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+static int ingress_print_opt(struct qdisc_util *qu, FILE *f,
+ struct rtattr *opt)
{
-
- fprintf(f, "---------------- ");
+ fprintf(f, "---------------- ");
return 0;
}
diff --git a/tc/q_netem.c b/tc/q_netem.c
index cd990a0a..7bc8c6a5 100644
--- a/tc/q_netem.c
+++ b/tc/q_netem.c
@@ -688,4 +688,3 @@ struct qdisc_util netem_qdisc_util = {
.parse_qopt = netem_parse_opt,
.print_qopt = netem_print_opt,
};
-
diff --git a/tc/q_prio.c b/tc/q_prio.c
index bacc7024..3236bec1 100644
--- a/tc/q_prio.c
+++ b/tc/q_prio.c
@@ -122,4 +122,3 @@ struct qdisc_util prio_qdisc_util = {
.parse_qopt = prio_parse_opt,
.print_qopt = prio_print_opt,
};
-
diff --git a/tc/q_red.c b/tc/q_red.c
index 89e73201..abd86c7b 100644
--- a/tc/q_red.c
+++ b/tc/q_red.c
@@ -29,7 +29,7 @@
static void explain(void)
{
fprintf(stderr, "Usage: ... red limit BYTES [min BYTES] [max BYTES] avpkt BYTES [burst PACKETS]\n");
- fprintf(stderr, " [adaptive] [probability PROBABILITY] bandwidth KBPS\n");
+ fprintf(stderr, " [adaptive] [probability PROBABILITY] [bandwidth KBPS]\n");
fprintf(stderr, " [ecn] [harddrop]\n");
}
@@ -40,7 +40,7 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl
unsigned avpkt = 0;
double probability = 0.02;
unsigned rate = 0;
- int wlog;
+ int parm;
__u8 sbuf[256];
__u32 max_P;
struct rtattr *tail;
@@ -109,9 +109,6 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl
argc--; argv++;
}
- if (rate == 0)
- get_rate(&rate, "10Mbit");
-
if (!opt.limit || !avpkt) {
fprintf(stderr, "RED: Required parameter (limit, avpkt) is missing\n");
return -1;
@@ -126,23 +123,27 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl
opt.qth_min = opt.qth_max / 3;
if (!burst)
burst = (2 * opt.qth_min + opt.qth_max) / (3 * avpkt);
- if ((wlog = tc_red_eval_ewma(opt.qth_min, burst, avpkt)) < 0) {
+ if (!rate) {
+ get_rate(&rate, "10Mbit");
+ fprintf(stderr, "RED: set bandwidth to 10Mbit\n");
+ }
+ if ((parm = tc_red_eval_ewma(opt.qth_min, burst, avpkt)) < 0) {
fprintf(stderr, "RED: failed to calculate EWMA constant.\n");
return -1;
}
- if (wlog >= 10)
- fprintf(stderr, "RED: WARNING. Burst %d seems to be too large.\n", burst);
- opt.Wlog = wlog;
- if ((wlog = tc_red_eval_P(opt.qth_min, opt.qth_max, probability)) < 0) {
+ if (parm >= 10)
+ fprintf(stderr, "RED: WARNING. Burst %u seems to be too large.\n", burst);
+ opt.Wlog = parm;
+ if ((parm = tc_red_eval_P(opt.qth_min, opt.qth_max, probability)) < 0) {
fprintf(stderr, "RED: failed to calculate probability.\n");
return -1;
}
- opt.Plog = wlog;
- if ((wlog = tc_red_eval_idle_damping(opt.Wlog, avpkt, rate, sbuf)) < 0) {
+ opt.Plog = parm;
+ if ((parm = tc_red_eval_idle_damping(opt.Wlog, avpkt, rate, sbuf)) < 0) {
fprintf(stderr, "RED: failed to calculate idle damping table.\n");
return -1;
}
- opt.Scell_log = wlog;
+ opt.Scell_log = parm;
tail = NLMSG_TAIL(n);
addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
diff --git a/tc/q_tbf.c b/tc/q_tbf.c
index 2d563311..0981e6f7 100644
--- a/tc/q_tbf.c
+++ b/tc/q_tbf.c
@@ -328,4 +328,3 @@ struct qdisc_util tbf_qdisc_util = {
.parse_qopt = tbf_parse_opt,
.print_qopt = tbf_print_opt,
};
-
diff --git a/tc/tc.c b/tc/tc.c
index e2e75afd..e1d4bc32 100644
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -36,6 +36,7 @@ int show_details = 0;
int show_raw = 0;
int show_pretty = 0;
int show_graph = 0;
+int timestamp;
int batch_mode = 0;
int resolve_hosts = 0;
@@ -221,7 +222,7 @@ static void usage(void)
#else
" tc [-force] -batch filename\n"
#endif
- "where OBJECT := { qdisc | class | filter | action | monitor }\n"
+ "where OBJECT := { qdisc | class | filter | action | monitor | exec }\n"
" OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [filename] | "
"-n[etns] name |\n"
" -nm | -nam[es] | { -cf | -conf } path }\n");
@@ -231,19 +232,16 @@ static int do_cmd(int argc, char **argv)
{
if (matches(*argv, "qdisc") == 0)
return do_qdisc(argc-1, argv+1);
-
if (matches(*argv, "class") == 0)
return do_class(argc-1, argv+1);
-
if (matches(*argv, "filter") == 0)
return do_filter(argc-1, argv+1);
-
if (matches(*argv, "actions") == 0)
return do_action(argc-1, argv+1);
-
if (matches(*argv, "monitor") == 0)
return do_tcmonitor(argc-1, argv+1);
-
+ if (matches(*argv, "exec") == 0)
+ return do_exec(argc-1, argv+1);
if (matches(*argv, "help") == 0) {
usage();
return 0;
@@ -351,6 +349,11 @@ int main(int argc, char **argv)
matches(argv[1], "-conf") == 0) {
NEXT_ARG();
conf_file = argv[1];
+ } else if (matches(argv[1], "-timestamp") == 0) {
+ timestamp++;
+ } else if (matches(argv[1], "-tshort") == 0) {
+ ++timestamp;
+ ++timestamp_short;
} else {
fprintf(stderr, "Option \"%s\" is unknown, try \"tc -help\".\n", argv[1]);
return -1;
diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c
index c6901d6c..42c88418 100644
--- a/tc/tc_bpf.c
+++ b/tc/tc_bpf.c
@@ -8,6 +8,7 @@
*
* Authors: Daniel Borkmann <dborkman@redhat.com>
* Jiri Pirko <jiri@resnulli.us>
+ * Alexei Starovoitov <ast@plumgrid.com>
*/
#include <stdio.h>
@@ -15,18 +16,84 @@
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
+#include <stdint.h>
#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+
+#ifdef HAVE_ELF
+#include <libelf.h>
+#include <gelf.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <sys/vfs.h>
+#include <sys/mount.h>
+#include <sys/syscall.h>
+#include <sys/sendfile.h>
+#include <sys/resource.h>
+
+#include <linux/bpf.h>
#include <linux/filter.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
+#include <linux/if_alg.h>
+
+#include <arpa/inet.h>
#include "utils.h"
+
+#include "bpf_elf.h"
+#include "bpf_scm.h"
+
#include "tc_util.h"
#include "tc_bpf.h"
-int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
- char **bpf_string, bool *need_release,
- const char separator)
+#ifdef HAVE_ELF
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose);
+#else
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose)
+{
+ fprintf(stderr, "No ELF library support compiled in.\n");
+ errno = ENOSYS;
+ return -1;
+}
+#endif
+
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+#ifdef __NR_bpf
+ return syscall(__NR_bpf, cmd, attr, size);
+#else
+ fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+static int bpf_map_update(int fd, const void *key, const void *value,
+ uint64_t flags)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = bpf_ptr_to_u64(key),
+ .value = bpf_ptr_to_u64(value),
+ .flags = flags,
+ };
+
+ return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
+ char **bpf_string, bool *need_release,
+ const char separator)
{
char sp;
@@ -74,8 +141,8 @@ int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
return 0;
}
-int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
- bool from_file)
+static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
+ bool from_file)
{
char *bpf_string, *token, separator = ',';
int ret = 0, i = 0;
@@ -119,7 +186,6 @@ int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
goto out;
}
ret = bpf_len;
-
out:
if (need_release)
free(bpf_string);
@@ -141,6 +207,1686 @@ void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
ops[i].jf, ops[i].k);
- fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt,
+ fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
ops[i].jf, ops[i].k);
}
+
+static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
+ int length)
+{
+ char file[PATH_MAX], buff[4096];
+ struct bpf_elf_map tmp, zero;
+ unsigned int val;
+ FILE *fp;
+
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "No procfs support?!\n");
+ return -EIO;
+ }
+
+ memset(&tmp, 0, sizeof(tmp));
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
+ tmp.type = val;
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+ tmp.size_key = val;
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+ tmp.size_value = val;
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+ tmp.max_elem = val;
+ }
+
+ fclose(fp);
+
+ if (!memcmp(&tmp, map, length)) {
+ return 0;
+ } else {
+ memset(&zero, 0, sizeof(zero));
+ /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
+ * so just accept it. We know we do have an eBPF fd and in this
+ * case, everything is 0. It is guaranteed that no such map exists
+ * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
+ */
+ if (!memcmp(&tmp, &zero, length))
+ return 0;
+
+ fprintf(stderr, "Map specs from pinned file differ!\n");
+ return -EINVAL;
+ }
+}
+
+static int bpf_mnt_fs(const char *target)
+{
+ bool bind_done = false;
+
+ while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
+ if (errno != EINVAL || bind_done) {
+ fprintf(stderr, "mount --make-private %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ if (mount(target, target, "none", MS_BIND, NULL)) {
+ fprintf(stderr, "mount --bind %s %s failed: %s\n",
+ target, target, strerror(errno));
+ return -1;
+ }
+
+ bind_done = true;
+ }
+
+ if (mount("bpf", target, "bpf", 0, NULL)) {
+ fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
+{
+ struct statfs st_fs;
+
+ if (statfs(mnt, &st_fs) < 0)
+ return -ENOENT;
+ if ((unsigned long)st_fs.f_type != magic)
+ return -ENOENT;
+
+ return 0;
+}
+
+static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
+ char *mnt, int len,
+ const char * const *known_mnts)
+{
+ const char * const *ptr;
+ char type[100];
+ FILE *fp;
+
+ if (known_mnts) {
+ ptr = known_mnts;
+ while (*ptr) {
+ if (bpf_valid_mntpt(*ptr, magic) == 0) {
+ strncpy(mnt, *ptr, len - 1);
+ mnt[len - 1] = 0;
+ return mnt;
+ }
+ ptr++;
+ }
+ }
+
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL || len != PATH_MAX)
+ return NULL;
+
+ while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
+ mnt, type) == 2) {
+ if (strcmp(type, fstype) == 0)
+ break;
+ }
+
+ fclose(fp);
+ if (strcmp(type, fstype) != 0)
+ return NULL;
+
+ return mnt;
+}
+
+int bpf_trace_pipe(void)
+{
+ char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
+ static const char * const tracefs_known_mnts[] = {
+ TRACE_DIR_MNT,
+ "/sys/kernel/debug/tracing",
+ "/tracing",
+ "/trace",
+ 0,
+ };
+ char tpipe[PATH_MAX];
+ const char *mnt;
+ int fd;
+
+ mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
+ sizeof(tracefs_mnt), tracefs_known_mnts);
+ if (!mnt) {
+ fprintf(stderr, "tracefs not mounted?\n");
+ return -1;
+ }
+
+ snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
+
+ fd = open(tpipe, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ fprintf(stderr, "Running! Hang up with ^C!\n\n");
+ while (1) {
+ static char buff[4096];
+ ssize_t ret;
+
+ ret = read(fd, buff, sizeof(buff) - 1);
+ if (ret > 0) {
+ write(2, buff, ret);
+ fflush(stderr);
+ }
+ }
+
+ return 0;
+}
+
+static const char *bpf_get_tc_dir(void)
+{
+ static bool bpf_mnt_cached = false;
+ static char bpf_tc_dir[PATH_MAX];
+ static const char *mnt;
+ static const char * const bpf_known_mnts[] = {
+ BPF_DIR_MNT,
+ 0,
+ };
+ char bpf_mnt[PATH_MAX] = BPF_DIR_MNT;
+ char bpf_glo_dir[PATH_MAX];
+ int ret;
+
+ if (bpf_mnt_cached)
+ goto done;
+
+ mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt),
+ bpf_known_mnts);
+ if (!mnt) {
+ mnt = getenv(BPF_ENV_MNT);
+ if (!mnt)
+ mnt = BPF_DIR_MNT;
+ ret = bpf_mnt_fs(mnt);
+ if (ret) {
+ mnt = NULL;
+ goto out;
+ }
+ }
+
+ snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC);
+ ret = mkdir(bpf_tc_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir,
+ strerror(errno));
+ mnt = NULL;
+ goto out;
+ }
+
+ snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s",
+ bpf_tc_dir, BPF_DIR_GLOBALS);
+ ret = mkdir(bpf_glo_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
+ strerror(errno));
+ mnt = NULL;
+ goto out;
+ }
+
+ mnt = bpf_tc_dir;
+out:
+ bpf_mnt_cached = true;
+done:
+ return mnt;
+}
+
+static int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr;
+ char tmp[PATH_MAX];
+
+ if (strlen(pathname) > 2 && pathname[0] == 'm' &&
+ pathname[1] == ':' && bpf_get_tc_dir()) {
+ snprintf(tmp, sizeof(tmp), "%s/%s",
+ bpf_get_tc_dir(), pathname + 2);
+ pathname = tmp;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.pathname = bpf_ptr_to_u64(pathname);
+
+ return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+const char *bpf_default_section(const enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return ELF_SECTION_CLASSIFIER;
+ case BPF_PROG_TYPE_SCHED_ACT:
+ return ELF_SECTION_ACTION;
+ default:
+ return NULL;
+ }
+}
+
+enum bpf_mode {
+ CBPF_BYTECODE = 0,
+ CBPF_FILE,
+ EBPF_OBJECT,
+ EBPF_PINNED,
+ __BPF_MODE_MAX,
+#define BPF_MODE_MAX __BPF_MODE_MAX
+};
+
+static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl,
+ enum bpf_prog_type *type, enum bpf_mode *mode,
+ const char **ptr_object, const char **ptr_section,
+ const char **ptr_uds_name, struct sock_filter *opcodes)
+{
+ const char *file, *section, *uds_name;
+ bool verbose = false;
+ int ret, argc;
+ char **argv;
+
+ argv = *ptr_argv;
+ argc = *ptr_argc;
+
+ if (opt_tbl[CBPF_BYTECODE] &&
+ (matches(*argv, "bytecode") == 0 ||
+ strcmp(*argv, "bc") == 0)) {
+ *mode = CBPF_BYTECODE;
+ } else if (opt_tbl[CBPF_FILE] &&
+ (matches(*argv, "bytecode-file") == 0 ||
+ strcmp(*argv, "bcf") == 0)) {
+ *mode = CBPF_FILE;
+ } else if (opt_tbl[EBPF_OBJECT] &&
+ (matches(*argv, "object-file") == 0 ||
+ strcmp(*argv, "obj") == 0)) {
+ *mode = EBPF_OBJECT;
+ } else if (opt_tbl[EBPF_PINNED] &&
+ (matches(*argv, "object-pinned") == 0 ||
+ matches(*argv, "pinned") == 0 ||
+ matches(*argv, "fd") == 0)) {
+ *mode = EBPF_PINNED;
+ } else {
+ fprintf(stderr, "What mode is \"%s\"?\n", *argv);
+ return -1;
+ }
+
+ NEXT_ARG();
+ file = section = uds_name = NULL;
+ if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
+ file = *argv;
+ NEXT_ARG_FWD();
+
+ if (*type == BPF_PROG_TYPE_UNSPEC) {
+ if (argc > 0 && matches(*argv, "type") == 0) {
+ NEXT_ARG();
+ if (matches(*argv, "cls") == 0) {
+ *type = BPF_PROG_TYPE_SCHED_CLS;
+ } else if (matches(*argv, "act") == 0) {
+ *type = BPF_PROG_TYPE_SCHED_ACT;
+ } else {
+ fprintf(stderr, "What type is \"%s\"?\n",
+ *argv);
+ return -1;
+ }
+ NEXT_ARG_FWD();
+ } else {
+ *type = BPF_PROG_TYPE_SCHED_CLS;
+ }
+ }
+
+ section = bpf_default_section(*type);
+ if (argc > 0 && matches(*argv, "section") == 0) {
+ NEXT_ARG();
+ section = *argv;
+ NEXT_ARG_FWD();
+ }
+
+ uds_name = getenv(BPF_ENV_UDS);
+ if (argc > 0 && !uds_name &&
+ matches(*argv, "export") == 0) {
+ NEXT_ARG();
+ uds_name = *argv;
+ NEXT_ARG_FWD();
+ }
+
+ if (argc > 0 && matches(*argv, "verbose") == 0) {
+ verbose = true;
+ NEXT_ARG_FWD();
+ }
+
+ PREV_ARG();
+ }
+
+ if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
+ ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE);
+ else if (*mode == EBPF_OBJECT)
+ ret = bpf_obj_open(file, *type, section, verbose);
+ else if (*mode == EBPF_PINNED)
+ ret = bpf_obj_get(file);
+ else
+ return -1;
+
+ if (ptr_object)
+ *ptr_object = file;
+ if (ptr_section)
+ *ptr_section = section;
+ if (ptr_uds_name)
+ *ptr_uds_name = uds_name;
+
+ *ptr_argc = argc;
+ *ptr_argv = argv;
+
+ return ret;
+}
+
+int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
+ enum bpf_prog_type type, const char **ptr_object,
+ const char **ptr_uds_name, struct nlmsghdr *n)
+{
+ struct sock_filter opcodes[BPF_MAXINSNS];
+ const bool opt_tbl[BPF_MODE_MAX] = {
+ [CBPF_BYTECODE] = true,
+ [CBPF_FILE] = true,
+ [EBPF_OBJECT] = true,
+ [EBPF_PINNED] = true,
+ };
+ char annotation[256];
+ const char *section;
+ enum bpf_mode mode;
+ int ret;
+
+ ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode,
+ ptr_object, &section, ptr_uds_name, opcodes);
+ if (ret < 0)
+ return ret;
+
+ if (mode == CBPF_BYTECODE || mode == CBPF_FILE) {
+ addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret);
+ addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes,
+ ret * sizeof(struct sock_filter));
+ }
+
+ if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
+ snprintf(annotation, sizeof(annotation), "%s:[%s]",
+ basename(*ptr_object), mode == EBPF_PINNED ?
+ "*fsobj" : section);
+
+ addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret);
+ addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation);
+ }
+
+ return 0;
+}
+
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
+{
+ enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
+ const bool opt_tbl[BPF_MODE_MAX] = {
+ [CBPF_BYTECODE] = false,
+ [CBPF_FILE] = false,
+ [EBPF_OBJECT] = true,
+ [EBPF_PINNED] = true,
+ };
+ const struct bpf_elf_map test = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .size_key = sizeof(int),
+ .size_value = sizeof(int),
+ };
+ int ret, prog_fd, map_fd;
+ const char *section;
+ enum bpf_mode mode;
+ uint32_t map_key;
+
+ prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode,
+ NULL, &section, NULL, NULL);
+ if (prog_fd < 0)
+ return prog_fd;
+ if (key) {
+ map_key = *key;
+ } else {
+ ret = sscanf(section, "%*i/%i", &map_key);
+ if (ret != 1) {
+ fprintf(stderr, "Couldn\'t infer map key from section "
+ "name! Please provide \'key\' argument!\n");
+ ret = -EINVAL;
+ goto out_prog;
+ }
+ }
+
+ map_fd = bpf_obj_get(map_path);
+ if (map_fd < 0) {
+ fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
+ map_path, strerror(errno));
+ ret = map_fd;
+ goto out_prog;
+ }
+
+ ret = bpf_map_selfcheck_pinned(map_fd, &test,
+ offsetof(struct bpf_elf_map, max_elem));
+ if (ret < 0) {
+ fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
+ goto out_map;
+ }
+
+ ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
+ if (ret < 0)
+ fprintf(stderr, "Map update failed: %s\n", strerror(errno));
+out_map:
+ close(map_fd);
+out_prog:
+ close(prog_fd);
+ return ret;
+}
+
+#ifdef HAVE_ELF
+struct bpf_elf_prog {
+ enum bpf_prog_type type;
+ const struct bpf_insn *insns;
+ size_t size;
+ const char *license;
+};
+
+struct bpf_hash_entry {
+ unsigned int pinning;
+ const char *subpath;
+ struct bpf_hash_entry *next;
+};
+
+struct bpf_elf_ctx {
+ Elf *elf_fd;
+ GElf_Ehdr elf_hdr;
+ Elf_Data *sym_tab;
+ Elf_Data *str_tab;
+ int obj_fd;
+ int map_fds[ELF_MAX_MAPS];
+ struct bpf_elf_map maps[ELF_MAX_MAPS];
+ int sym_num;
+ int map_num;
+ bool *sec_done;
+ int sec_maps;
+ char license[ELF_MAX_LICENSE_LEN];
+ enum bpf_prog_type type;
+ bool verbose;
+ struct bpf_elf_st stat;
+ struct bpf_hash_entry *ht[256];
+};
+
+struct bpf_elf_sec_data {
+ GElf_Shdr sec_hdr;
+ Elf_Data *sec_data;
+ const char *sec_name;
+};
+
+struct bpf_map_data {
+ int *fds;
+ const char *obj;
+ struct bpf_elf_st *st;
+ struct bpf_elf_map *ent;
+};
+
+/* If we provide a small buffer with log level enabled, the kernel
+ * could fail program load as no buffer space is available for the
+ * log and thus verifier fails. In case something doesn't pass the
+ * verifier we still want to hand something descriptive to the user.
+ */
+static char bpf_log_buf[65536];
+
+static __check_format_string(1, 2) void bpf_dump_error(const char *format, ...)
+{
+ va_list vl;
+
+ va_start(vl, format);
+ vfprintf(stderr, format, vl);
+ va_end(vl);
+
+ if (bpf_log_buf[0]) {
+ fprintf(stderr, "%s\n", bpf_log_buf);
+ memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
+ }
+}
+
+static int bpf_map_create(enum bpf_map_type type, unsigned int size_key,
+ unsigned int size_value, unsigned int max_elem)
+{
+ union bpf_attr attr = {
+ .map_type = type,
+ .key_size = size_key,
+ .value_size = size_value,
+ .max_entries = max_elem,
+ };
+
+ return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t size, const char *license)
+{
+ union bpf_attr attr = {
+ .prog_type = type,
+ .insns = bpf_ptr_to_u64(insns),
+ .insn_cnt = size / sizeof(struct bpf_insn),
+ .license = bpf_ptr_to_u64(license),
+ .log_buf = bpf_ptr_to_u64(bpf_log_buf),
+ .log_size = sizeof(bpf_log_buf),
+ .log_level = 1,
+ };
+
+ if (getenv(BPF_ENV_NOLOG)) {
+ attr.log_buf = 0;
+ attr.log_size = 0;
+ attr.log_level = 0;
+ }
+
+ return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = bpf_ptr_to_u64(pathname),
+ .bpf_fd = fd,
+ };
+
+ return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
+{
+ struct sockaddr_alg alg = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash",
+ .salg_name = "sha1",
+ };
+ int ret, cfd, ofd, ffd;
+ struct stat stbuff;
+ ssize_t size;
+
+ if (!object || len != 20)
+ return -EINVAL;
+
+ cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
+ strerror(errno));
+ return cfd;
+ }
+
+ ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
+ if (ret < 0) {
+ fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
+ goto out_cfd;
+ }
+
+ ofd = accept(cfd, NULL, 0);
+ if (ofd < 0) {
+ fprintf(stderr, "Error accepting socket: %s\n",
+ strerror(errno));
+ ret = ofd;
+ goto out_cfd;
+ }
+
+ ffd = open(object, O_RDONLY);
+ if (ffd < 0) {
+ fprintf(stderr, "Error opening object %s: %s\n",
+ object, strerror(errno));
+ ret = ffd;
+ goto out_ofd;
+ }
+
+ ret = fstat(ffd, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "Error doing fstat: %s\n",
+ strerror(errno));
+ goto out_ffd;
+ }
+
+ size = sendfile(ofd, ffd, NULL, stbuff.st_size);
+ if (size != stbuff.st_size) {
+ fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
+ size, stbuff.st_size, strerror(errno));
+ ret = -1;
+ goto out_ffd;
+ }
+
+ size = read(ofd, out, len);
+ if (size != len) {
+ fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
+ size, len, strerror(errno));
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+out_ffd:
+ close(ffd);
+out_ofd:
+ close(ofd);
+out_cfd:
+ close(cfd);
+ return ret;
+}
+
+static const char *bpf_get_obj_uid(const char *pathname)
+{
+ static bool bpf_uid_cached = false;
+ static char bpf_uid[64];
+ uint8_t tmp[20];
+ int ret;
+
+ if (bpf_uid_cached)
+ goto done;
+
+ ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
+ if (ret) {
+ fprintf(stderr, "Object hashing failed!\n");
+ return NULL;
+ }
+
+ hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
+ bpf_uid_cached = true;
+done:
+ return bpf_uid;
+}
+
+static int bpf_init_env(const char *pathname)
+{
+ struct rlimit limit = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ /* Don't bother in case we fail! */
+ setrlimit(RLIMIT_MEMLOCK, &limit);
+
+ if (!bpf_get_tc_dir()) {
+ fprintf(stderr, "Continuing without mounted eBPF fs. "
+ "Too old kernel?\n");
+ return 0;
+ }
+
+ if (!bpf_get_obj_uid(pathname))
+ return -1;
+
+ return 0;
+}
+
+static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ struct bpf_hash_entry *entry;
+
+ entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ while (entry && entry->pinning != pinning)
+ entry = entry->next;
+
+ return entry ? entry->subpath : NULL;
+}
+
+static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return false;
+ case PIN_NONE:
+ return true;
+ default:
+ return !bpf_custom_pinning(ctx, pinning);
+ }
+}
+
+static void bpf_make_pathname(char *pathname, size_t len, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
+ bpf_get_obj_uid(NULL), name);
+ break;
+ case PIN_GLOBAL_NS:
+ snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
+ BPF_DIR_GLOBALS, name);
+ break;
+ default:
+ snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(),
+ bpf_custom_pinning(ctx, pinning), name);
+ break;
+ }
+}
+
+static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ char pathname[PATH_MAX];
+
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
+ return 0;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_get(pathname);
+}
+
+static int bpf_make_obj_path(void)
+{
+ char tmp[PATH_MAX];
+ int ret;
+
+ snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(),
+ bpf_get_obj_uid(NULL));
+
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bpf_make_custom_path(const char *todo)
+{
+ char tmp[PATH_MAX], rem[PATH_MAX], *sub;
+ int ret;
+
+ snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir());
+ snprintf(rem, sizeof(rem), "%s/", todo);
+ sub = strtok(rem, "/");
+
+ while (sub) {
+ if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
+ return -EINVAL;
+
+ strcat(tmp, sub);
+ strcat(tmp, "/");
+
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp,
+ strerror(errno));
+ return ret;
+ }
+
+ sub = strtok(NULL, "/");
+ }
+
+ return 0;
+}
+
+static int bpf_place_pinned(int fd, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
+{
+ char pathname[PATH_MAX];
+ const char *tmp;
+ int ret = 0;
+
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
+ return 0;
+
+ if (pinning == PIN_OBJECT_NS)
+ ret = bpf_make_obj_path();
+ else if ((tmp = bpf_custom_pinning(ctx, pinning)))
+ ret = bpf_make_custom_path(tmp);
+ if (ret < 0)
+ return ret;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_pin(fd, pathname);
+}
+
+static int bpf_prog_attach(const char *section,
+ const struct bpf_elf_prog *prog, bool verbose)
+{
+ int fd;
+
+ /* We can add pinning here later as well, same as bpf_map_attach(). */
+ errno = 0;
+ fd = bpf_prog_load(prog->type, prog->insns, prog->size,
+ prog->license);
+ if (fd < 0 || verbose) {
+ bpf_dump_error("Prog section \'%s\' (type:%u insns:%zu "
+ "license:\'%s\') %s%s (%d)!\n\n",
+ section, prog->type,
+ prog->size / sizeof(struct bpf_insn),
+ prog->license, fd < 0 ? "rejected: " :
+ "loaded", fd < 0 ? strerror(errno) : "",
+ fd < 0 ? errno : fd);
+ }
+
+ return fd;
+}
+
+static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
+ const struct bpf_elf_ctx *ctx, bool verbose)
+{
+ int fd, ret;
+
+ fd = bpf_probe_pinned(name, ctx, map->pinning);
+ if (fd > 0) {
+ ret = bpf_map_selfcheck_pinned(fd, map,
+ offsetof(struct bpf_elf_map,
+ id));
+ if (ret < 0) {
+ close(fd);
+ fprintf(stderr, "Map \'%s\' self-check failed!\n",
+ name);
+ return ret;
+ }
+ if (verbose)
+ fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
+ name);
+ return fd;
+ }
+
+ errno = 0;
+ fd = bpf_map_create(map->type, map->size_key, map->size_value,
+ map->max_elem);
+ if (fd < 0 || verbose) {
+ bpf_dump_error("Map \'%s\' (type:%u id:%u pinning:%u "
+ "ksize:%u vsize:%u max-elems:%u) %s%s (%d)!\n",
+ name, map->type, map->id, map->pinning,
+ map->size_key, map->size_value, map->max_elem,
+ fd < 0 ? "rejected: " : "loaded", fd < 0 ?
+ strerror(errno) : "", fd < 0 ? errno : fd);
+ if (fd < 0)
+ return fd;
+ }
+
+ ret = bpf_place_pinned(fd, name, ctx, map->pinning);
+ if (ret < 0 && errno != EEXIST) {
+ fprintf(stderr, "Could not pin %s map: %s\n", name,
+ strerror(errno));
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+#define __ELF_ST_BIND(x) ((x) >> 4)
+#define __ELF_ST_TYPE(x) (((unsigned int) x) & 0xf)
+
+static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
+ const GElf_Sym *sym)
+{
+ return ctx->str_tab->d_buf + sym->st_name;
+}
+
+static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
+{
+ GElf_Sym sym;
+ int i;
+
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+
+ if (__ELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ __ELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps ||
+ sym.st_value / sizeof(struct bpf_elf_map) != which)
+ continue;
+
+ return bpf_str_tab_name(ctx, &sym);
+ }
+
+ return NULL;
+}
+
+static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
+{
+ const char *map_name;
+ int i, fd;
+
+ for (i = 0; i < ctx->map_num; i++) {
+ map_name = bpf_map_fetch_name(ctx, i);
+ if (!map_name)
+ return -EIO;
+
+ fd = bpf_map_attach(map_name, &ctx->maps[i], ctx,
+ ctx->verbose);
+ if (fd < 0)
+ return fd;
+
+ ctx->map_fds[i] = fd;
+ }
+
+ return 0;
+}
+
+static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ Elf_Data *sec_edata;
+ GElf_Shdr sec_hdr;
+ Elf_Scn *sec_fd;
+ char *sec_name;
+
+ memset(data, 0, sizeof(*data));
+
+ sec_fd = elf_getscn(ctx->elf_fd, section);
+ if (!sec_fd)
+ return -EINVAL;
+ if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
+ return -EIO;
+
+ sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
+ sec_hdr.sh_name);
+ if (!sec_name || !sec_hdr.sh_size)
+ return -ENOENT;
+
+ sec_edata = elf_getdata(sec_fd, NULL);
+ if (!sec_edata || elf_getdata(sec_fd, sec_edata))
+ return -EIO;
+
+ memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
+
+ data->sec_name = sec_name;
+ data->sec_data = sec_edata;
+ return 0;
+}
+
+static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0)
+ return -EINVAL;
+
+ ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map);
+ ctx->sec_maps = section;
+ ctx->sec_done[section] = true;
+
+ if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) {
+ fprintf(stderr, "Too many BPF maps in ELF section!\n");
+ return -ENOMEM;
+ }
+
+ memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size);
+ return 0;
+}
+
+static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ if (data->sec_data->d_size > sizeof(ctx->license))
+ return -ENOMEM;
+
+ memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->sym_tab = data->sec_data;
+ ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->str_tab = data->sec_data;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ int i, ret = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0)
+ continue;
+
+ if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+ !strcmp(data.sec_name, ELF_SECTION_MAPS))
+ ret = bpf_fetch_maps(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+ !strcmp(data.sec_name, ELF_SECTION_LICENSE))
+ ret = bpf_fetch_license(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
+ !strcmp(data.sec_name, ".symtab"))
+ ret = bpf_fetch_symtab(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_STRTAB &&
+ !strcmp(data.sec_name, ".strtab"))
+ ret = bpf_fetch_strtab(ctx, i, &data);
+ if (ret < 0) {
+ fprintf(stderr, "Error parsing section %d! Perhaps"
+ "check with readelf -a?\n", i);
+ break;
+ }
+ }
+
+ if (ctx->sym_tab && ctx->str_tab && ctx->sec_maps) {
+ ret = bpf_maps_attach_all(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error loading maps into kernel!\n");
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section)
+{
+ struct bpf_elf_sec_data data;
+ struct bpf_elf_prog prog;
+ int ret, i, fd = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
+
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0 ||
+ !(data.sec_hdr.sh_type == SHT_PROGBITS &&
+ data.sec_hdr.sh_flags & SHF_EXECINSTR &&
+ !strcmp(data.sec_name, section)))
+ continue;
+
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data.sec_data->d_buf;
+ prog.size = data.sec_data->d_size;
+ prog.license = ctx->license;
+
+ fd = bpf_prog_attach(section, &prog, ctx->verbose);
+ if (fd < 0)
+ continue;
+
+ ctx->sec_done[i] = true;
+ break;
+ }
+
+ return fd;
+}
+
+static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
+ struct bpf_elf_sec_data *data_relo,
+ struct bpf_elf_sec_data *data_insn)
+{
+ Elf_Data *idata = data_insn->sec_data;
+ GElf_Shdr *rhdr = &data_relo->sec_hdr;
+ int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
+ struct bpf_insn *insns = idata->d_buf;
+ unsigned int num_insns = idata->d_size / sizeof(*insns);
+
+ for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
+ unsigned int ioff, rmap;
+ GElf_Rel relo;
+ GElf_Sym sym;
+
+ if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
+ return -EIO;
+
+ ioff = relo.r_offset / sizeof(struct bpf_insn);
+ if (ioff >= num_insns ||
+ insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
+ return -EINVAL;
+
+ if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
+ return -EIO;
+
+ rmap = sym.st_value / sizeof(struct bpf_elf_map);
+ if (rmap >= ARRAY_SIZE(ctx->map_fds))
+ return -EINVAL;
+ if (!ctx->map_fds[rmap])
+ return -EINVAL;
+
+ if (ctx->verbose)
+ fprintf(stderr, "Map \'%s\' (%d) injected into prog "
+ "section \'%s\' at offset %u!\n",
+ bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
+ data_insn->sec_name, ioff);
+
+ insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[ioff].imm = ctx->map_fds[rmap];
+ }
+
+ return 0;
+}
+
+static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section)
+{
+ struct bpf_elf_sec_data data_relo, data_insn;
+ struct bpf_elf_prog prog;
+ int ret, idx, i, fd = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data_relo);
+ if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
+ continue;
+
+ idx = data_relo.sec_hdr.sh_info;
+ ret = bpf_fill_section_data(ctx, idx, &data_insn);
+ if (ret < 0 ||
+ !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
+ data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
+ !strcmp(data_insn.sec_name, section)))
+ continue;
+
+ ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
+ if (ret < 0)
+ continue;
+
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data_insn.sec_data->d_buf;
+ prog.size = data_insn.sec_data->d_size;
+ prog.license = ctx->license;
+
+ fd = bpf_prog_attach(section, &prog, ctx->verbose);
+ if (fd < 0)
+ continue;
+
+ ctx->sec_done[i] = true;
+ ctx->sec_done[idx] = true;
+ break;
+ }
+
+ return fd;
+}
+
+static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
+{
+ int ret = -1;
+
+ if (ctx->sym_tab)
+ ret = bpf_fetch_prog_relo(ctx, section);
+ if (ret < 0)
+ ret = bpf_fetch_prog(ctx, section);
+
+ return ret;
+}
+
+static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
+ if (ctx->map_fds[i] && ctx->maps[i].id == id &&
+ ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ return i;
+ return -1;
+}
+
+static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ uint32_t map_id, key_id;
+ int fd, i, ret, idx;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
+
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0)
+ continue;
+
+ ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
+ if (ret != 2)
+ continue;
+
+ idx = bpf_find_map_by_id(ctx, map_id);
+ if (idx < 0)
+ continue;
+
+ fd = bpf_fetch_prog_sec(ctx, data.sec_name);
+ if (fd < 0)
+ return -EIO;
+
+ ret = bpf_map_update(ctx->map_fds[idx], &key_id,
+ &fd, BPF_ANY);
+ if (ret < 0)
+ return -ENOENT;
+
+ ctx->sec_done[i] = true;
+ }
+
+ return 0;
+}
+
+static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
+{
+ struct stat st;
+ int ret;
+
+ memset(&ctx->stat, 0, sizeof(ctx->stat));
+
+ ret = fstat(ctx->obj_fd, &st);
+ if (ret < 0) {
+ fprintf(stderr, "Stat of elf file failed: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ ctx->stat.st_dev = st.st_dev;
+ ctx->stat.st_ino = st.st_ino;
+}
+
+static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
+{
+ char buff[PATH_MAX];
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ char *ptr = buff;
+
+ while (*ptr == ' ' || *ptr == '\t')
+ ptr++;
+
+ if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
+ continue;
+
+ if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
+ sscanf(ptr, "%i %s #", id, path) != 2) {
+ strcpy(path, ptr);
+ return -1;
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static bool bpf_pinning_reserved(uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_NONE:
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
+{
+ struct bpf_hash_entry *entry;
+ char subpath[PATH_MAX];
+ uint32_t pinning;
+ FILE *fp;
+ int ret;
+
+ fp = fopen(db_file, "r");
+ if (!fp)
+ return;
+
+ memset(subpath, 0, sizeof(subpath));
+ while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
+ if (ret == -1) {
+ fprintf(stderr, "Database %s is corrupted at: %s\n",
+ db_file, subpath);
+ fclose(fp);
+ return;
+ }
+
+ if (bpf_pinning_reserved(pinning)) {
+ fprintf(stderr, "Database %s, id %u is reserved - "
+ "ignoring!\n", db_file, pinning);
+ continue;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (!entry) {
+ fprintf(stderr, "No memory left for db entry!\n");
+ continue;
+ }
+
+ entry->pinning = pinning;
+ entry->subpath = strdup(subpath);
+ if (!entry->subpath) {
+ fprintf(stderr, "No memory left for db entry!\n");
+ free(entry);
+ continue;
+ }
+
+ entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
+ }
+
+ fclose(fp);
+}
+
+static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_hash_entry *entry;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
+ while ((entry = ctx->ht[i]) != NULL) {
+ ctx->ht[i] = entry->next;
+ free((char *)entry->subpath);
+ free(entry);
+ }
+ }
+}
+
+static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
+{
+ if (ctx->elf_hdr.e_type != ET_REL ||
+ ctx->elf_hdr.e_machine != 0 ||
+ ctx->elf_hdr.e_version != EV_CURRENT) {
+ fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
+ return -EINVAL;
+ }
+
+ switch (ctx->elf_hdr.e_ident[EI_DATA]) {
+ default:
+ fprintf(stderr, "ELF format error, wrong endianness info?\n");
+ return -EINVAL;
+ case ELFDATA2LSB:
+ if (htons(1) == 1) {
+ fprintf(stderr,
+ "We are big endian, eBPF object is little endian!\n");
+ return -EIO;
+ }
+ break;
+ case ELFDATA2MSB:
+ if (htons(1) != 1) {
+ fprintf(stderr,
+ "We are little endian, eBPF object is big endian!\n");
+ return -EIO;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
+ enum bpf_prog_type type, bool verbose)
+{
+ int ret = -EINVAL;
+
+ if (elf_version(EV_CURRENT) == EV_NONE ||
+ bpf_init_env(pathname))
+ return ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->verbose = verbose;
+ ctx->type = type;
+
+ ctx->obj_fd = open(pathname, O_RDONLY);
+ if (ctx->obj_fd < 0)
+ return ctx->obj_fd;
+
+ ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
+ if (!ctx->elf_fd) {
+ ret = -EINVAL;
+ goto out_fd;
+ }
+
+ if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
+ ret = -EINVAL;
+ goto out_fd;
+ }
+
+ if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
+ &ctx->elf_hdr) {
+ ret = -EIO;
+ goto out_elf;
+ }
+
+ ret = bpf_elf_check_ehdr(ctx);
+ if (ret < 0)
+ goto out_elf;
+
+ ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
+ sizeof(*(ctx->sec_done)));
+ if (!ctx->sec_done) {
+ ret = -ENOMEM;
+ goto out_elf;
+ }
+
+ bpf_save_finfo(ctx);
+ bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
+
+ return 0;
+out_elf:
+ elf_end(ctx->elf_fd);
+out_fd:
+ close(ctx->obj_fd);
+ return ret;
+}
+
+static int bpf_maps_count(struct bpf_elf_ctx *ctx)
+{
+ int i, count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (!ctx->map_fds[i])
+ break;
+ count++;
+ }
+
+ return count;
+}
+
+static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (ctx->map_fds[i])
+ close(ctx->map_fds[i]);
+ }
+}
+
+static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
+{
+ if (failure)
+ bpf_maps_teardown(ctx);
+
+ bpf_hash_destroy(ctx);
+ free(ctx->sec_done);
+ elf_end(ctx->elf_fd);
+ close(ctx->obj_fd);
+}
+
+static struct bpf_elf_ctx __ctx;
+
+static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
+ const char *section, bool verbose)
+{
+ struct bpf_elf_ctx *ctx = &__ctx;
+ int fd = 0, ret;
+
+ ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot initialize ELF context!\n");
+ return ret;
+ }
+
+ ret = bpf_fetch_ancillary(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error fetching ELF ancillary data!\n");
+ goto out;
+ }
+
+ fd = bpf_fetch_prog_sec(ctx, section);
+ if (fd < 0) {
+ fprintf(stderr, "Error fetching program/map!\n");
+ ret = fd;
+ goto out;
+ }
+
+ ret = bpf_fill_prog_arrays(ctx);
+ if (ret < 0)
+ fprintf(stderr, "Error filling program arrays!\n");
+out:
+ bpf_elf_ctx_destroy(ctx, ret < 0);
+ if (ret < 0) {
+ if (fd)
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+static int
+bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
+ const struct bpf_map_data *aux, unsigned int entries)
+{
+ struct bpf_map_set_msg msg;
+ int *cmsg_buf, min_fd;
+ char *amsg_buf;
+ int i;
+
+ memset(&msg, 0, sizeof(msg));
+
+ msg.aux.uds_ver = BPF_SCM_AUX_VER;
+ msg.aux.num_ent = entries;
+
+ strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
+ memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
+
+ cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
+ amsg_buf = (char *)msg.aux.ent;
+
+ for (i = 0; i < entries; i += min_fd) {
+ int ret;
+
+ min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
+ bpf_map_set_init_single(&msg, min_fd);
+
+ memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
+ memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
+
+ ret = sendmsg(fd, &msg.hdr, 0);
+ if (ret <= 0)
+ return ret ? : -1;
+ }
+
+ return 0;
+}
+
+static int
+bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ struct bpf_map_set_msg msg;
+ int *cmsg_buf, min_fd;
+ char *amsg_buf, *mmsg_buf;
+ unsigned int needed = 1;
+ int i;
+
+ cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
+ amsg_buf = (char *)msg.aux.ent;
+ mmsg_buf = (char *)&msg.aux;
+
+ for (i = 0; i < min(entries, needed); i += min_fd) {
+ struct cmsghdr *cmsg;
+ int ret;
+
+ min_fd = min(entries, entries - i);
+ bpf_map_set_init_single(&msg, min_fd);
+
+ ret = recvmsg(fd, &msg.hdr, 0);
+ if (ret <= 0)
+ return ret ? : -1;
+
+ cmsg = CMSG_FIRSTHDR(&msg.hdr);
+ if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
+ return -EINVAL;
+ if (msg.hdr.msg_flags & MSG_CTRUNC)
+ return -EIO;
+ if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
+ return -ENOSYS;
+
+ min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
+ if (min_fd > entries || min_fd <= 0)
+ return -EINVAL;
+
+ memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
+ memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
+ memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
+
+ needed = aux->num_ent;
+ }
+
+ return 0;
+}
+
+int bpf_send_map_fds(const char *path, const char *obj)
+{
+ struct bpf_elf_ctx *ctx = &__ctx;
+ struct sockaddr_un addr;
+ struct bpf_map_data bpf_aux;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path));
+
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot connect to %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ memset(&bpf_aux, 0, sizeof(bpf_aux));
+
+ bpf_aux.fds = ctx->map_fds;
+ bpf_aux.ent = ctx->maps;
+ bpf_aux.st = &ctx->stat;
+ bpf_aux.obj = obj;
+
+ ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
+ bpf_maps_count(ctx));
+ if (ret < 0)
+ fprintf(stderr, "Cannot send fds to %s: %s\n",
+ path, strerror(errno));
+
+ bpf_maps_teardown(ctx);
+ close(fd);
+ return ret;
+}
+
+int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ struct sockaddr_un addr;
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path));
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot bind to socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = bpf_map_set_recv(fd, fds, aux, entries);
+ if (ret < 0)
+ fprintf(stderr, "Cannot recv fds from %s: %s\n",
+ path, strerror(errno));
+
+ unlink(addr.sun_path);
+ close(fd);
+ return ret;
+}
+#endif /* HAVE_ELF */
diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h
index 08cca927..526d0b12 100644
--- a/tc/tc_bpf.h
+++ b/tc/tc_bpf.h
@@ -13,16 +13,67 @@
#ifndef _TC_BPF_H_
#define _TC_BPF_H_ 1
-#include <stdio.h>
-#include <linux/filter.h>
#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
+#include <linux/bpf.h>
+#include <linux/magic.h>
-int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
- char **bpf_string, bool *need_release,
- const char separator);
-int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
- bool from_file);
-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+#include "utils.h"
+#include "bpf_scm.h"
+
+enum {
+ BPF_NLA_OPS_LEN = 0,
+ BPF_NLA_OPS,
+ BPF_NLA_FD,
+ BPF_NLA_NAME,
+ __BPF_NLA_MAX,
+};
+
+#define BPF_NLA_MAX __BPF_NLA_MAX
+
+#define BPF_ENV_UDS "TC_BPF_UDS"
+#define BPF_ENV_MNT "TC_BPF_MNT"
+#define BPF_ENV_NOLOG "TC_BPF_NOLOG"
+
+#ifndef BPF_FS_MAGIC
+# define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+#define BPF_DIR_MNT "/sys/fs/bpf"
+
+#define BPF_DIR_TC "tc"
+#define BPF_DIR_GLOBALS "globals"
+#ifndef TRACEFS_MAGIC
+# define TRACEFS_MAGIC 0x74726163
#endif
+
+#define TRACE_DIR_MNT "/sys/kernel/tracing"
+
+int bpf_trace_pipe(void);
+const char *bpf_default_section(const enum bpf_prog_type type);
+
+int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
+ enum bpf_prog_type type, const char **ptr_object,
+ const char **ptr_uds_name, struct nlmsghdr *n);
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
+
+void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+
+#ifdef HAVE_ELF
+int bpf_send_map_fds(const char *path, const char *obj);
+int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries);
+#else
+static inline int bpf_send_map_fds(const char *path, const char *obj)
+{
+ return 0;
+}
+
+static inline int bpf_recv_map_fds(const char *path, int *fds,
+ struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ return -1;
+}
+#endif /* HAVE_ELF */
+#endif /* _TC_BPF_H_ */
diff --git a/tc/tc_class.c b/tc/tc_class.c
index 877048aa..3acd030f 100644
--- a/tc/tc_class.c
+++ b/tc/tc_class.c
@@ -153,7 +153,7 @@ static int tc_class_modify(int cmd, unsigned flags, int argc, char **argv)
}
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return 2;
return 0;
diff --git a/tc/tc_common.h b/tc/tc_common.h
index 96a0e20f..a2f38984 100644
--- a/tc/tc_common.h
+++ b/tc/tc_common.h
@@ -2,11 +2,14 @@
#define TCA_BUF_MAX (64*1024)
extern struct rtnl_handle rth;
+
extern int do_qdisc(int argc, char **argv);
extern int do_class(int argc, char **argv);
extern int do_filter(int argc, char **argv);
extern int do_action(int argc, char **argv);
extern int do_tcmonitor(int argc, char **argv);
+extern int do_exec(int argc, char **argv);
+
extern int print_action(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_filter(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_qdisc(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
diff --git a/tc/tc_exec.c b/tc/tc_exec.c
new file mode 100644
index 00000000..61be6721
--- /dev/null
+++ b/tc/tc_exec.c
@@ -0,0 +1,109 @@
+/*
+ * tc_exec.c "tc exec".
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Daniel Borkmann <daniel@iogearbox.net>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+
+#include "utils.h"
+
+#include "tc_util.h"
+#include "tc_common.h"
+
+static struct exec_util *exec_list;
+static void *BODY = NULL;
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: tc exec [ EXEC_TYPE ] [ help | OPTIONS ]\n");
+ fprintf(stderr, "Where:\n");
+ fprintf(stderr, "EXEC_TYPE := { bpf | etc. }\n");
+ fprintf(stderr, "OPTIONS := ... try tc exec <desired EXEC_KIND> help\n");
+}
+
+static int parse_noeopt(struct exec_util *eu, int argc, char **argv)
+{
+ if (argc) {
+ fprintf(stderr, "Unknown exec \"%s\", hence option \"%s\" "
+ "is unparsable\n", eu->id, *argv);
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct exec_util *get_exec_kind(const char *name)
+{
+ struct exec_util *eu;
+ char buf[256];
+ void *dlh;
+
+ for (eu = exec_list; eu; eu = eu->next)
+ if (strcmp(eu->id, name) == 0)
+ return eu;
+
+ snprintf(buf, sizeof(buf), "%s/e_%s.so", get_tc_lib(), name);
+ dlh = dlopen(buf, RTLD_LAZY);
+ if (dlh == NULL) {
+ dlh = BODY;
+ if (dlh == NULL) {
+ dlh = BODY = dlopen(NULL, RTLD_LAZY);
+ if (dlh == NULL)
+ goto noexist;
+ }
+ }
+
+ snprintf(buf, sizeof(buf), "%s_exec_util", name);
+ eu = dlsym(dlh, buf);
+ if (eu == NULL)
+ goto noexist;
+reg:
+ eu->next = exec_list;
+ exec_list = eu;
+
+ return eu;
+noexist:
+ eu = malloc(sizeof(*eu));
+ if (eu) {
+ memset(eu, 0, sizeof(*eu));
+ strncpy(eu->id, name, sizeof(eu->id) - 1);
+ eu->parse_eopt = parse_noeopt;
+ goto reg;
+ }
+
+ return eu;
+}
+
+int do_exec(int argc, char **argv)
+{
+ struct exec_util *eu;
+ char kind[16];
+
+ if (argc < 1) {
+ fprintf(stderr, "No command given, try \"tc exec help\".\n");
+ return -1;
+ }
+
+ if (matches(*argv, "help") == 0) {
+ usage();
+ return 0;
+ }
+
+ memset(kind, 0, sizeof(kind));
+ strncpy(kind, *argv, sizeof(kind) - 1);
+
+ eu = get_exec_kind(kind);
+
+ argc--;
+ argv++;
+
+ return eu->parse_eopt(eu, argc, argv);
+}
diff --git a/tc/tc_filter.c b/tc/tc_filter.c
index 609fbe9b..1a1082b4 100644
--- a/tc/tc_filter.c
+++ b/tc/tc_filter.c
@@ -26,25 +26,21 @@
#include "tc_util.h"
#include "tc_common.h"
-static void usage(void);
-
static void usage(void)
{
fprintf(stderr, "Usage: tc filter [ add | del | change | replace | show ] dev STRING\n");
fprintf(stderr, " [ pref PRIO ] protocol PROTO\n");
fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n");
- fprintf(stderr, " [ root | classid CLASSID ] [ handle FILTERID ]\n");
- fprintf(stderr, " [ [ FILTER_TYPE ] [ help | OPTIONS ] ]\n");
+ fprintf(stderr, " [ root | ingress | egress | parent CLASSID ]\n");
+ fprintf(stderr, " [ handle FILTERID ] [ [ FILTER_TYPE ] [ help | OPTIONS ] ]\n");
fprintf(stderr, "\n");
- fprintf(stderr, " tc filter show [ dev STRING ] [ root | parent CLASSID ]\n");
+ fprintf(stderr, " tc filter show [ dev STRING ] [ root | ingress | egress | parent CLASSID ]\n");
fprintf(stderr, "Where:\n");
- fprintf(stderr, "FILTER_TYPE := { rsvp | u32 | fw | route | etc. }\n");
+ fprintf(stderr, "FILTER_TYPE := { rsvp | u32 | bpf | fw | route | etc. }\n");
fprintf(stderr, "FILTERID := ... format depends on classifier, see there\n");
fprintf(stderr, "OPTIONS := ... try tc filter add <desired FILTER_KIND> help\n");
- return;
}
-
static int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv)
{
struct {
@@ -87,6 +83,20 @@ static int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv)
return -1;
}
req.t.tcm_parent = TC_H_ROOT;
+ } else if (strcmp(*argv, "ingress") == 0) {
+ if (req.t.tcm_parent) {
+ fprintf(stderr, "Error: \"ingress\" is duplicate parent ID\n");
+ return -1;
+ }
+ req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
+ TC_H_MIN_INGRESS);
+ } else if (strcmp(*argv, "egress") == 0) {
+ if (req.t.tcm_parent) {
+ fprintf(stderr, "Error: \"egress\" is duplicate parent ID\n");
+ return -1;
+ }
+ req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
+ TC_H_MIN_EGRESS);
} else if (strcmp(*argv, "parent") == 0) {
__u32 handle;
NEXT_ARG();
@@ -167,7 +177,7 @@ static int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv)
}
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0) {
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0) {
fprintf(stderr, "We have an error talking to the kernel\n");
return 2;
}
@@ -220,11 +230,16 @@ int print_filter(const struct sockaddr_nl *who,
if (!filter_parent || filter_parent != t->tcm_parent) {
if (t->tcm_parent == TC_H_ROOT)
fprintf(fp, "root ");
+ else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS))
+ fprintf(fp, "ingress ");
+ else if (t->tcm_parent == TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS))
+ fprintf(fp, "egress ");
else {
print_tc_classid(abuf, sizeof(abuf), t->tcm_parent);
fprintf(fp, "parent %s ", abuf);
}
}
+
if (t->tcm_info) {
f_proto = TC_H_MIN(t->tcm_info);
__u32 prio = TC_H_MAJ(t->tcm_info)>>16;
@@ -259,7 +274,6 @@ int print_filter(const struct sockaddr_nl *who,
return 0;
}
-
static int tc_filter_list(int argc, char **argv)
{
struct tcmsg t;
@@ -284,6 +298,22 @@ static int tc_filter_list(int argc, char **argv)
return -1;
}
filter_parent = t.tcm_parent = TC_H_ROOT;
+ } else if (strcmp(*argv, "ingress") == 0) {
+ if (t.tcm_parent) {
+ fprintf(stderr, "Error: \"ingress\" is duplicate parent ID\n");
+ return -1;
+ }
+ filter_parent = TC_H_MAKE(TC_H_CLSACT,
+ TC_H_MIN_INGRESS);
+ t.tcm_parent = filter_parent;
+ } else if (strcmp(*argv, "egress") == 0) {
+ if (t.tcm_parent) {
+ fprintf(stderr, "Error: \"egress\" is duplicate parent ID\n");
+ return -1;
+ }
+ filter_parent = TC_H_MAKE(TC_H_CLSACT,
+ TC_H_MIN_EGRESS);
+ t.tcm_parent = filter_parent;
} else if (strcmp(*argv, "parent") == 0) {
__u32 handle;
NEXT_ARG();
@@ -375,4 +405,3 @@ int do_filter(int argc, char **argv)
fprintf(stderr, "Command \"%s\" is unknown, try \"tc filter help\".\n", *argv);
return -1;
}
-
diff --git a/tc/tc_monitor.c b/tc/tc_monitor.c
index 0efe0343..ebb94320 100644
--- a/tc/tc_monitor.c
+++ b/tc/tc_monitor.c
@@ -30,16 +30,20 @@ static void usage(void) __attribute__((noreturn));
static void usage(void)
{
- fprintf(stderr, "Usage: tc monitor\n");
+ fprintf(stderr, "Usage: tc [-timestamp [-tshort] monitor\n");
exit(-1);
}
static int accept_tcmsg(const struct sockaddr_nl *who,
+ struct rtnl_ctrl_data *ctrl,
struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
+ if (timestamp)
+ print_timestamp(fp);
+
if (n->nlmsg_type == RTM_NEWTFILTER || n->nlmsg_type == RTM_DELTFILTER) {
print_filter(who, n, arg);
return 0;
@@ -87,13 +91,17 @@ int do_tcmonitor(int argc, char **argv)
}
if (file) {
- FILE *fp;
- fp = fopen(file, "r");
+ FILE *fp = fopen(file, "r");
+ int ret;
+
if (fp == NULL) {
perror("Cannot fopen");
exit(-1);
}
- return rtnl_from_file(fp, accept_tcmsg, (void*)stdout);
+
+ ret = rtnl_from_file(fp, accept_tcmsg, stdout);
+ fclose(fp);
+ return ret;
}
if (rtnl_open(&rth, groups) < 0)
diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c
index c71937d0..cb861e08 100644
--- a/tc/tc_qdisc.c
+++ b/tc/tc_qdisc.c
@@ -26,17 +26,15 @@
#include "tc_util.h"
#include "tc_common.h"
-static int usage(void);
-
static int usage(void)
{
fprintf(stderr, "Usage: tc qdisc [ add | del | replace | change | show ] dev STRING\n");
- fprintf(stderr, " [ handle QHANDLE ] [ root | ingress | parent CLASSID ]\n");
+ fprintf(stderr, " [ handle QHANDLE ] [ root | ingress | clsact | parent CLASSID ]\n");
fprintf(stderr, " [ estimator INTERVAL TIME_CONSTANT ]\n");
fprintf(stderr, " [ stab [ help | STAB_OPTIONS] ]\n");
fprintf(stderr, " [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n");
fprintf(stderr, "\n");
- fprintf(stderr, " tc qdisc show [ dev STRING ] [ingress]\n");
+ fprintf(stderr, " tc qdisc show [ dev STRING ] [ ingress | clsact ]\n");
fprintf(stderr, "Where:\n");
fprintf(stderr, "QDISC_KIND := { [p|b]fifo | tbf | prio | cbq | red | etc. }\n");
fprintf(stderr, "OPTIONS := ... try tc qdisc add <desired QDISC_KIND> help\n");
@@ -91,20 +89,28 @@ static int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
return -1;
}
req.t.tcm_parent = TC_H_ROOT;
-#ifdef TC_H_INGRESS
+ } else if (strcmp(*argv, "clsact") == 0) {
+ if (req.t.tcm_parent) {
+ fprintf(stderr, "Error: \"clsact\" is a duplicate parent ID\n");
+ return -1;
+ }
+ req.t.tcm_parent = TC_H_CLSACT;
+ strncpy(k, "clsact", sizeof(k) - 1);
+ q = get_qdisc_kind(k);
+ req.t.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+ NEXT_ARG_FWD();
+ break;
} else if (strcmp(*argv, "ingress") == 0) {
if (req.t.tcm_parent) {
fprintf(stderr, "Error: \"ingress\" is a duplicate parent ID\n");
return -1;
}
req.t.tcm_parent = TC_H_INGRESS;
- strncpy(k, "ingress", sizeof(k)-1);
+ strncpy(k, "ingress", sizeof(k) - 1);
q = get_qdisc_kind(k);
- req.t.tcm_handle = 0xffff0000;
-
- argc--; argv++;
+ req.t.tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ NEXT_ARG_FWD();
break;
-#endif
} else if (strcmp(*argv, "parent") == 0) {
__u32 handle;
NEXT_ARG();
@@ -187,7 +193,7 @@ static int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
req.t.tcm_ifindex = idx;
}
- if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ if (rtnl_talk(&rth, &req.n, NULL, 0) < 0)
return 2;
return 0;
@@ -277,7 +283,6 @@ int print_qdisc(const struct sockaddr_nl *who,
return 0;
}
-
static int tc_qdisc_list(int argc, char **argv)
{
struct tcmsg t;
@@ -291,14 +296,13 @@ static int tc_qdisc_list(int argc, char **argv)
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
strncpy(d, *argv, sizeof(d)-1);
-#ifdef TC_H_INGRESS
- } else if (strcmp(*argv, "ingress") == 0) {
+ } else if (strcmp(*argv, "ingress") == 0 ||
+ strcmp(*argv, "clsact") == 0) {
if (t.tcm_parent) {
fprintf(stderr, "Duplicate parent ID\n");
usage();
}
t.tcm_parent = TC_H_INGRESS;
-#endif
} else if (matches(*argv, "help") == 0) {
usage();
} else {
diff --git a/tc/tc_stab.c b/tc/tc_stab.c
index a8404f8e..aba8ae87 100644
--- a/tc/tc_stab.c
+++ b/tc/tc_stab.c
@@ -67,42 +67,32 @@ int parse_size_table(int *argcp, char ***argvp, struct tc_sizespec *sp)
NEXT_ARG();
if (s.mtu)
duparg("mtu", *argv);
- if (get_u32(&s.mtu, *argv, 10)) {
+ if (get_u32(&s.mtu, *argv, 10))
invarg("mtu", "invalid mtu");
- return -1;
- }
} else if (matches(*argv, "mpu") == 0) {
NEXT_ARG();
if (s.mpu)
duparg("mpu", *argv);
- if (get_u32(&s.mpu, *argv, 10)) {
+ if (get_u32(&s.mpu, *argv, 10))
invarg("mpu", "invalid mpu");
- return -1;
- }
} else if (matches(*argv, "overhead") == 0) {
NEXT_ARG();
if (s.overhead)
duparg("overhead", *argv);
- if (get_integer(&s.overhead, *argv, 10)) {
+ if (get_integer(&s.overhead, *argv, 10))
invarg("overhead", "invalid overhead");
- return -1;
- }
} else if (matches(*argv, "tsize") == 0) {
NEXT_ARG();
if (s.tsize)
duparg("tsize", *argv);
- if (get_u32(&s.tsize, *argv, 10)) {
+ if (get_u32(&s.tsize, *argv, 10))
invarg("tsize", "invalid table size");
- return -1;
- }
} else if (matches(*argv, "linklayer") == 0) {
NEXT_ARG();
if (s.linklayer != LINKLAYER_UNSPEC)
duparg("linklayer", *argv);
- if (get_linklayer(&s.linklayer, *argv)) {
+ if (get_linklayer(&s.linklayer, *argv))
invarg("linklayer", "invalid linklayer");
- return -1;
- }
} else
break;
argc--; argv++;
@@ -158,4 +148,3 @@ void print_size_table(FILE *fp, const char *prefix, struct rtattr *rta)
}
#endif
}
-
diff --git a/tc/tc_util.c b/tc/tc_util.c
index 1d3153df..4764ecce 100644
--- a/tc/tc_util.c
+++ b/tc/tc_util.c
@@ -128,30 +128,31 @@ ok:
return 0;
}
-int print_tc_classid(char *buf, int len, __u32 h)
+int print_tc_classid(char *buf, int blen, __u32 h)
{
- char handle[40] = {};
+ SPRINT_BUF(handle) = {};
+ int hlen = SPRINT_BSIZE - 1;
if (h == TC_H_ROOT)
sprintf(handle, "root");
else if (h == TC_H_UNSPEC)
- snprintf(handle, len, "none");
+ snprintf(handle, hlen, "none");
else if (TC_H_MAJ(h) == 0)
- snprintf(handle, len, ":%x", TC_H_MIN(h));
+ snprintf(handle, hlen, ":%x", TC_H_MIN(h));
else if (TC_H_MIN(h) == 0)
- snprintf(handle, len, "%x:", TC_H_MAJ(h) >> 16);
+ snprintf(handle, hlen, "%x:", TC_H_MAJ(h) >> 16);
else
- snprintf(handle, len, "%x:%x", TC_H_MAJ(h) >> 16, TC_H_MIN(h));
+ snprintf(handle, hlen, "%x:%x", TC_H_MAJ(h) >> 16, TC_H_MIN(h));
if (use_names) {
char clname[IDNAME_MAX] = {};
if (id_to_name(cls_names, h, clname))
- snprintf(buf, len, "%s#%s", clname, handle);
+ snprintf(buf, blen, "%s#%s", clname, handle);
else
- snprintf(buf, len, "%s", handle);
+ snprintf(buf, blen, "%s", handle);
} else {
- snprintf(buf, len, "%s", handle);
+ snprintf(buf, blen, "%s", handle);
}
return 0;
@@ -249,18 +250,19 @@ void print_rate(char *buf, int len, __u64 rate)
extern int use_iec;
unsigned long kilo = use_iec ? 1024 : 1000;
const char *str = use_iec ? "i" : "";
- int i = 0;
static char *units[5] = {"", "K", "M", "G", "T"};
+ int i;
rate <<= 3; /* bytes/sec -> bits/sec */
- for (i = 0; i < ARRAY_SIZE(units); i++) {
+ for (i = 0; i < ARRAY_SIZE(units) - 1; i++) {
if (rate < kilo)
break;
if (((rate % kilo) != 0) && rate < 1000*kilo)
break;
rate /= kilo;
}
+
snprintf(buf, len, "%.0f%s%sbit", (double)rate, units[i], str);
}
@@ -606,4 +608,3 @@ compat_xstats:
if (tb[TCA_XSTATS] && xstats)
*xstats = tb[TCA_XSTATS];
}
-
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 1be1b501..61e60b1c 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -19,8 +19,7 @@ enum
#define TCA_PRIO_MAX (__TCA_PRIO_MAX - 1)
#endif
-struct qdisc_util
-{
+struct qdisc_util {
struct qdisc_util *next;
const char *id;
int (*parse_qopt)(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n);
@@ -32,8 +31,7 @@ struct qdisc_util
};
extern __u16 f_proto;
-struct filter_util
-{
+struct filter_util {
struct filter_util *next;
char id[16];
int (*parse_fopt)(struct filter_util *qu, char *fhandle, int argc,
@@ -41,8 +39,7 @@ struct filter_util
int (*print_fopt)(struct filter_util *qu, FILE *f, struct rtattr *opt, __u32 fhandle);
};
-struct action_util
-{
+struct action_util {
struct action_util *next;
char id[16];
int (*parse_aopt)(struct action_util *a, int *argc, char ***argv,
@@ -51,6 +48,12 @@ struct action_util
int (*print_xstats)(struct action_util *au, FILE *f, struct rtattr *xstats);
};
+struct exec_util {
+ struct exec_util *next;
+ char id[16];
+ int (*parse_eopt)(struct exec_util *eu, int argc, char **argv);
+};
+
extern const char *get_tc_lib(void);
extern struct qdisc_util *get_qdisc_kind(const char *str);
@@ -69,6 +72,7 @@ extern void print_size(char *buf, int len, __u32 size);
extern void print_qdisc_handle(char *buf, int len, __u32 h);
extern void print_time(char *buf, int len, __u32 time);
extern void print_linklayer(char *buf, int len, unsigned linklayer);
+
extern char * sprint_rate(__u64 rate, char *buf);
extern char * sprint_size(__u32 size, char *buf);
extern char * sprint_qdisc_handle(__u32 h, char *buf);
diff --git a/testsuite/Makefile b/testsuite/Makefile
index a2c8a2d9..20276500 100644
--- a/testsuite/Makefile
+++ b/testsuite/Makefile
@@ -1,9 +1,11 @@
## -- Config --
DEV := lo
-PREFIX := sudo -E
+PREFIX := sudo -E unshare -n
RESULTS_DIR := results
## -- End Config --
+HAVE_UNSHARED_UTIL := $(shell unshare --version 2> /dev/null)
+
rwildcard=$(wildcard $1$2) $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2))
TESTS := $(patsubst tests/%,%,$(call rwildcard,tests/,*.t))
@@ -38,6 +40,9 @@ distclean: clean
echo "Entering iproute2" && cd iproute2 && $(MAKE) distclean && cd ..;
$(TESTS): clean
+ifeq (,$(HAVE_UNSHARED_UTIL))
+ $(error Please install util-linux tools to run tests in separated network namespace)
+endif
@mkdir -p $(RESULTS_DIR)
@for d in $(TESTS_DIR); do \
@@ -47,6 +52,9 @@ $(TESTS): clean
@for i in $(IPVERS); do \
o=`echo $$i | sed -e 's/iproute2\///'`; \
echo -n "Running $@ [$$o/`uname -r`]: "; \
+ TMP_ERR=`mktemp /tmp/tc_testsuite.XXXXXX`; \
+ TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX`; \
+ STD_ERR="$$TMP_ERR" STD_OUT="$$TMP_OUT" \
TC="$$i/tc/tc" IP="$$i/ip/ip" DEV="$(DEV)" IPVER="$@" SNAME="$$i" \
ERRF="$(RESULTS_DIR)/$@.$$o.err" $(KENV) $(PREFIX) tests/$@ > $(RESULTS_DIR)/$@.$$o.out; \
if [ "$$?" = "127" ]; then \
@@ -56,5 +64,6 @@ $(TESTS): clean
else \
echo "PASS"; \
fi; \
+ rm "$$TMP_ERR" "$$TMP_OUT"; \
dmesg > $(RESULTS_DIR)/$@.$$o.dmesg; \
done
diff --git a/testsuite/lib/generic.sh b/testsuite/lib/generic.sh
index 3473cc13..b7de7044 100644
--- a/testsuite/lib/generic.sh
+++ b/testsuite/lib/generic.sh
@@ -30,57 +30,49 @@ ts_tc()
{
SCRIPT=$1; shift
DESC=$1; shift
- TMP_ERR=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
- TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
- $TC $@ 2> $TMP_ERR > $TMP_OUT
+ $TC $@ 2> $STD_ERR > $STD_OUT
- if [ -s $TMP_ERR ]; then
+ if [ -s $STD_ERR ]; then
ts_err "${SCRIPT}: ${DESC} failed:"
ts_err "command: $TC $@"
ts_err "stderr output:"
- ts_err_cat $TMP_ERR
- if [ -s $TMP_OUT ]; then
+ ts_err_cat $STD_ERR
+ if [ -s $STD_OUT ]; then
ts_err "stdout output:"
- ts_err_cat $TMP_OUT
+ ts_err_cat $STD_OUT
fi
- elif [ -s $TMP_OUT ]; then
+ elif [ -s $STD_OUT ]; then
echo "${SCRIPT}: ${DESC} succeeded with output:"
- cat $TMP_OUT
+ cat $STD_OUT
else
echo "${SCRIPT}: ${DESC} succeeded"
fi
-
- rm $TMP_ERR $TMP_OUT
}
ts_ip()
{
SCRIPT=$1; shift
DESC=$1; shift
- TMP_ERR=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
- TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
- $IP $@ 2> $TMP_ERR > $TMP_OUT
+ $IP $@ 2> $STD_ERR > $STD_OUT
RET=$?
- if [ -s $TMP_ERR ] || [ "$RET" != "0" ]; then
+ if [ -s $STD_ERR ] || [ "$RET" != "0" ]; then
ts_err "${SCRIPT}: ${DESC} failed:"
ts_err "command: $IP $@"
ts_err "stderr output:"
- ts_err_cat $TMP_ERR
- if [ -s $TMP_OUT ]; then
+ ts_err_cat $STD_ERR
+ if [ -s $STD_OUT ]; then
ts_err "stdout output:"
- ts_err_cat $TMP_OUT
+ ts_err_cat $STD_OUT
fi
- elif [ -s $TMP_OUT ]; then
+ elif [ -s $STD_OUT ]; then
echo "${SCRIPT}: ${DESC} succeeded with output:"
- cat $TMP_OUT
+ cat $STD_OUT
else
echo "${SCRIPT}: ${DESC} succeeded"
fi
-
- rm $TMP_ERR $TMP_OUT
}
ts_qdisc_available()
@@ -97,3 +89,47 @@ rand_dev()
{
echo "dev-$(tr -dc "[:alpha:]" < /dev/urandom | head -c 6)"
}
+
+pr_failed()
+{
+ echo " [FAILED]"
+ ts_err "matching failed"
+}
+
+pr_success()
+{
+ echo " [SUCCESS]"
+}
+
+test_on()
+{
+ echo -n "test on: \"$1\""
+ if cat "$STD_OUT" | grep -qE "$1"
+ then
+ pr_success
+ else
+ pr_failed
+ fi
+}
+
+test_on_not()
+{
+ echo -n "test on: \"$1\""
+ if cat "$STD_OUT" | grep -vqE "$1"
+ then
+ pr_success
+ else
+ pr_failed
+ fi
+}
+
+test_lines_count()
+{
+ echo -n "test on lines count ($1): "
+ if cat "$STD_OUT" | wc -l | grep -q "$1"
+ then
+ pr_success
+ else
+ pr_failed
+ fi
+}
diff --git a/testsuite/tests/ip/link/new_link.t b/testsuite/tests/ip/link/new_link.t
index 549ff256..699adbcd 100755
--- a/testsuite/tests/ip/link/new_link.t
+++ b/testsuite/tests/ip/link/new_link.t
@@ -7,5 +7,9 @@ ts_log "[Testing add/del virtual links]"
NEW_DEV="$(rand_dev)"
ts_ip "$0" "Add $NEW_DEV dummy interface" link add dev $NEW_DEV type dummy
+
ts_ip "$0" "Show $NEW_DEV dummy interface" link show dev $NEW_DEV
+test_on "$NEW_DEV"
+test_lines_count 2
+
ts_ip "$0" "Del $NEW_DEV dummy interface" link del dev $NEW_DEV
diff --git a/testsuite/tests/ip/route/add_default_route.t b/testsuite/tests/ip/route/add_default_route.t
new file mode 100755
index 00000000..e5ea6473
--- /dev/null
+++ b/testsuite/tests/ip/route/add_default_route.t
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+source lib/generic.sh
+
+ts_log "[Testing add default route]"
+
+DEV=dummy0
+
+ts_ip "$0" "Add new interface $DEV" link add $DEV type dummy
+ts_ip "$0" "Set $DEV into UP state" link set up dev $DEV
+
+ts_ip "$0" "Add 1.1.1.1/24 addr on $DEV" addr add 1.1.1.1/24 dev $DEV
+ts_ip "$0" "Add default route via 1.1.1.2" route add default via 1.1.1.2
+
+ts_ip "$0" "Show IPv4 default route" -4 route show default
+test_on "default via 1.1.1.2 dev $DEV"
+test_lines_count 1
+
+ts_ip "$0" "Add another IPv4 route dst 2.2.2.0/24" -4 route add 2.2.2.0/24 dev $DEV
+ts_ip "$0" "Show IPv4 default route" -4 route show default
+test_on "default via 1.1.1.2 dev $DEV"
+test_lines_count 1
+
+ts_ip "$0" "Add dead:beef::1/64 addr on $DEV" -6 addr add dead:beef::1/64 dev $DEV
+ts_ip "$0" "Add default route via dead:beef::2" route add default via dead:beef::2
+ts_ip "$0" "Show IPv6 default route" -6 route show default
+test_on "default via dead:beef::2 dev $DEV"
+test_lines_count 1
+
+ts_ip "$0" "Add another IPv6 route dst cafe:babe::/64" -6 route add cafe:babe::/64 dev $DEV
+ts_ip "$0" "Show IPv6 default route" -6 route show default
+test_on "default via dead:beef::2 dev $DEV"
+test_lines_count 1
diff --git a/testsuite/tests/ip/tunnel/add_tunnel.t b/testsuite/tests/ip/tunnel/add_tunnel.t
new file mode 100755
index 00000000..18f6e370
--- /dev/null
+++ b/testsuite/tests/ip/tunnel/add_tunnel.t
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+source lib/generic.sh
+
+TUNNEL_NAME="tunnel_test_ip"
+
+ts_log "[Testing add/del tunnels]"
+
+ts_ip "$0" "Add GRE tunnel over IPv4" tunnel add name $TUNNEL_NAME mode gre local 1.1.1.1 remote 2.2.2.2
+ts_ip "$0" "Del GRE tunnel over IPv4" tunnel del $TUNNEL_NAME
+
+ts_ip "$0" "Add GRE tunnel over IPv6" tunnel add name $TUNNEL_NAME mode ip6gre local dead:beef::1 remote dead:beef::2
+ts_ip "$0" "Del GRE tunnel over IPv6" tunnel del $TUNNEL_NAME
+
diff --git a/tipc/.gitignore b/tipc/.gitignore
new file mode 100644
index 00000000..39ed83d6
--- /dev/null
+++ b/tipc/.gitignore
@@ -0,0 +1 @@
+tipc
diff --git a/tipc/Makefile b/tipc/Makefile
new file mode 100644
index 00000000..f06dcb11
--- /dev/null
+++ b/tipc/Makefile
@@ -0,0 +1,27 @@
+include ../Config
+ifeq ($(HAVE_MNL),y)
+
+TIPCOBJ=bearer.o \
+ cmdl.o link.o \
+ media.o misc.o \
+ msg.o nametable.o \
+ node.o socket.o \
+ peer.o tipc.o
+
+include ../Config
+
+TARGETS=tipc
+CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
+LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
+
+endif
+
+all: $(TARGETS) $(LIBS)
+
+tipc: $(TIPCOBJ)
+
+install: all
+ install -m 0755 $(TARGETS) $(DESTDIR)$(SBINDIR)
+
+clean:
+ rm -f $(TIPCOBJ) $(TARGETS)
diff --git a/tipc/README b/tipc/README
new file mode 100644
index 00000000..578a0b7b
--- /dev/null
+++ b/tipc/README
@@ -0,0 +1,63 @@
+DESIGN DECISIONS
+----------------
+
+HELP
+~~~~
+--help or -h is used for help. We do not reserve the bare word "help", which
+for example the ip command does. Reserving a bare word like help quickly
+becomes cumbersome to handle in the code. It might be simple to handle
+when it's passed early in the command chain like "ip addr help". But when
+the user tries to pass "help" further down this requires manual checks and
+special treatment. For example, at the time of writing this tool, it's
+possible to create a vlan named "help" with the ip tool, but it's impossible
+to remove it, the command just shows help. This is an effect of treating
+bare words specially.
+
+Help texts are not dynamically generated. That is, we do not pass datastructures
+like command list or option lists and print them dynamically. This is
+intentional. There is always that exception and when it comes to help texts
+these exceptions are normally neglected at the expence of usability.
+
+KEY-VALUE
+~~~~~~~~~
+All options are key-values. There are both drawbacks and benefits to this.
+The main drawback is that it becomes more to write for the user and
+information might seem redundant. The main benefits is scalability and code
+simplification. Consistency is important.
+
+Consider this.
+1. tipc link set priority PRIO link LINK
+2. tipc link set LINK priority PRIO
+
+Link might seem redundant in (1). However, if the command should live for many
+years and be able to evolve example (2) limits the set command to only work on a
+single link with no ability to extend. As an example, lets say we introduce
+grouping on the kernel side.
+
+1. tipc link set priority PRIO group GROUP
+2. tipc link set ??? priority PRIO group GROUP
+
+2. breaks, we can't extend the command to cover a group.
+
+PARSING
+~~~~~~~
+Commands are single words. As an example, all words in "tipc link list" are
+commands. Options are key-values that can be given in any order. In
+"tipc link set priority PRIO link LINK" "tipc link set" are commands while
+priority and link are options. Meaning that they can be given like
+"tipc link set link LINK priority PRIO".
+
+Abbreviation matching works for both command and options. Meaning that
+"tipc link set priority PRIO link LINK" could be given as
+"tipc l s p PRIO l LINK" and "tipc link list" as "tipc l l".
+
+MEMORY
+~~~~~~
+The tool strives to avoid allocating memory on the heap. Most (if not all)
+memory allocations are on the stack.
+
+RETURNING
+~~~~~~~~~
+The tool could throw exit() deep down in functions but doing so always seems
+to limit the program in the long run. So we output the error and return an
+appropriate error code upon failure.
diff --git a/tipc/bearer.c b/tipc/bearer.c
new file mode 100644
index 00000000..30b54d9f
--- /dev/null
+++ b/tipc/bearer.c
@@ -0,0 +1,725 @@
+/*
+ * bearer.c TIPC bearer functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <errno.h>
+
+#include <linux/tipc_netlink.h>
+#include <linux/tipc.h>
+#include <linux/genetlink.h>
+
+#include <libmnl/libmnl.h>
+#include <sys/socket.h>
+
+#include "cmdl.h"
+#include "msg.h"
+#include "bearer.h"
+
+static void _print_bearer_opts(void)
+{
+ fprintf(stderr,
+ "\nOPTIONS\n"
+ " priority - Bearer link priority\n"
+ " tolerance - Bearer link tolerance\n"
+ " window - Bearer link window\n");
+}
+
+static void _print_bearer_media(void)
+{
+ fprintf(stderr,
+ "\nMEDIA\n"
+ " udp - User Datagram Protocol\n"
+ " ib - Infiniband\n"
+ " eth - Ethernet\n");
+}
+
+static void cmd_bearer_enable_l2_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s bearer enable media MEDIA device DEVICE [OPTIONS]\n"
+ "\nOPTIONS\n"
+ " domain DOMAIN - Discovery domain\n"
+ " priority PRIORITY - Bearer priority\n",
+ cmdl->argv[0]);
+}
+
+static void cmd_bearer_enable_udp_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s bearer enable media udp name NAME localip IP [OPTIONS]\n"
+ "\nOPTIONS\n"
+ " domain DOMAIN - Discovery domain\n"
+ " priority PRIORITY - Bearer priority\n"
+ " localport PORT - Local UDP port (default 6118)\n"
+ " remoteip IP - Remote IP address\n"
+ " remoteport IP - Remote UDP port (default 6118)\n",
+ cmdl->argv[0]);
+}
+
+static int enable_l2_bearer(struct nlmsghdr *nlh, struct opt *opts,
+ struct cmdl *cmdl)
+{
+ struct opt *opt;
+ char id[TIPC_MAX_BEARER_NAME];
+
+ if (!(opt = get_opt(opts, "device"))) {
+ fprintf(stderr, "error: missing bearer device\n");
+ return -EINVAL;
+ }
+ snprintf(id, sizeof(id), "eth:%s", opt->val);
+ mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, id);
+
+ return 0;
+}
+
+static int get_netid_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_NET_MAX + 1] = {};
+ int *netid = (int*)data;
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_NET])
+ return MNL_CB_ERROR;
+ mnl_attr_parse_nested(info[TIPC_NLA_NET], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_NET_ID])
+ return MNL_CB_ERROR;
+ *netid = mnl_attr_get_u32(attrs[TIPC_NLA_NET_ID]);
+
+ return MNL_CB_OK;
+}
+
+static int generate_multicast(short af, char *buf, int bufsize)
+{
+ int netid;
+ char mnl_msg[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+
+ if (!(nlh = msg_init(mnl_msg, TIPC_NL_NET_GET))) {
+ fprintf(stderr, "error, message initialization failed\n");
+ return -1;
+ }
+ if (msg_dumpit(nlh, get_netid_cb, &netid)) {
+ fprintf(stderr, "error, failed to fetch TIPC network id from kernel\n");
+ return -EINVAL;
+ }
+ if (af == AF_INET)
+ snprintf(buf, bufsize, "228.0.%u.%u", (netid>>8) & 0xFF, netid & 0xFF);
+ else
+ snprintf(buf, bufsize, "ff02::%u", netid);
+
+ return 0;
+}
+
+static int enable_udp_bearer(struct nlmsghdr *nlh, struct opt *opts,
+ struct cmdl *cmdl)
+{
+ int err;
+ struct opt *opt;
+ struct nlattr *nest;
+ char buf[INET6_ADDRSTRLEN];
+ char *locport = "6118";
+ char *remport = "6118";
+ char *locip = NULL;
+ char *remip = NULL;
+ char name[TIPC_MAX_BEARER_NAME];
+ struct addrinfo *loc = NULL;
+ struct addrinfo *rem = NULL;
+ struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_DGRAM
+ };
+
+ if (help_flag) {
+ cmd_bearer_enable_udp_help(cmdl);
+ /* TODO find a better error code? */
+ return -EINVAL;
+ }
+
+ if (!(opt = get_opt(opts, "name"))) {
+ fprintf(stderr, "error, udp bearer name missing\n");
+ cmd_bearer_enable_udp_help(cmdl);
+ return -EINVAL;
+ }
+ snprintf(name, sizeof(name), "udp:%s", opt->val);
+
+ if (!(opt = get_opt(opts, "localip"))) {
+ fprintf(stderr, "error, udp bearer localip missing\n");
+ cmd_bearer_enable_udp_help(cmdl);
+ return -EINVAL;
+ }
+ locip = opt->val;
+
+ if ((opt = get_opt(opts, "remoteip")))
+ remip = opt->val;
+
+ if ((opt = get_opt(opts, "localport")))
+ locport = opt->val;
+
+ if ((opt = get_opt(opts, "remoteport")))
+ remport = opt->val;
+
+ if ((err = getaddrinfo(locip, locport, &hints, &loc))) {
+ fprintf(stderr, "UDP local address error: %s\n",
+ gai_strerror(err));
+ return err;
+ }
+
+ if (!remip) {
+ if (generate_multicast(loc->ai_family, buf, sizeof(buf))) {
+ fprintf(stderr, "Failed to generate multicast address\n");
+ return -EINVAL;
+ }
+ remip = buf;
+ }
+
+ if ((err = getaddrinfo(remip, remport, &hints, &rem))) {
+ fprintf(stderr, "UDP remote address error: %s\n",
+ gai_strerror(err));
+ freeaddrinfo(loc);
+ return err;
+ }
+
+ if (rem->ai_family != loc->ai_family) {
+ fprintf(stderr, "UDP local and remote AF mismatch\n");
+ return -EINVAL;
+ }
+
+ mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, name);
+
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_BEARER_UDP_OPTS);
+ mnl_attr_put(nlh, TIPC_NLA_UDP_LOCAL, loc->ai_addrlen, loc->ai_addr);
+ mnl_attr_put(nlh, TIPC_NLA_UDP_REMOTE, rem->ai_addrlen, rem->ai_addr);
+ mnl_attr_nest_end(nlh, nest);
+
+ freeaddrinfo(rem);
+ freeaddrinfo(loc);
+
+ return 0;
+}
+
+static void cmd_bearer_enable_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s bearer enable [OPTIONS] media MEDIA ARGS...\n\n"
+ "OPTIONS\n"
+ " domain DOMAIN - Discovery domain\n"
+ " priority PRIORITY - Bearer priority\n",
+ cmdl->argv[0]);
+ _print_bearer_media();
+}
+
+static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int err;
+ struct opt *opt;
+ struct nlattr *nest;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ char *media;
+ struct opt opts[] = {
+ { "device", NULL },
+ { "domain", NULL },
+ { "localip", NULL },
+ { "localport", NULL },
+ { "media", NULL },
+ { "name", NULL },
+ { "priority", NULL },
+ { "remoteip", NULL },
+ { "remoteport", NULL },
+ { NULL }
+ };
+
+ if (parse_opts(opts, cmdl) < 0) {
+ if (help_flag)
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (!(opt = get_opt(opts, "media"))) {
+ if (help_flag)
+ (cmd->help)(cmdl);
+ else
+ fprintf(stderr, "error, missing bearer media\n");
+ return -EINVAL;
+ }
+ media = opt->val;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_BEARER_ENABLE))) {
+ fprintf(stderr, "error: message initialisation failed\n");
+ return -1;
+ }
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_BEARER);
+
+ if ((opt = get_opt(opts, "domain")))
+ mnl_attr_put_u32(nlh, TIPC_NLA_BEARER_DOMAIN, atoi(opt->val));
+
+ if ((opt = get_opt(opts, "priority"))) {
+ struct nlattr *props;
+
+ props = mnl_attr_nest_start(nlh, TIPC_NLA_BEARER_PROP);
+ mnl_attr_put_u32(nlh, TIPC_NLA_PROP_PRIO, atoi(opt->val));
+ mnl_attr_nest_end(nlh, props);
+ }
+
+ if (strcmp(media, "udp") == 0) {
+ if (help_flag) {
+ cmd_bearer_enable_udp_help(cmdl);
+ return -EINVAL;
+ }
+ if ((err = enable_udp_bearer(nlh, opts, cmdl)))
+ return err;
+ } else if ((strcmp(media, "eth") == 0) || (strcmp(media, "udp") == 0)) {
+ if (help_flag) {
+ cmd_bearer_enable_l2_help(cmdl);
+ return -EINVAL;
+ }
+ if ((err = enable_l2_bearer(nlh, opts, cmdl)))
+ return err;
+ } else {
+ fprintf(stderr, "error, invalid media type \"%s\"\n", media);
+ return -EINVAL;
+ }
+
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_doit(nlh, NULL, NULL);
+}
+
+static int add_l2_bearer(struct nlmsghdr *nlh, struct opt *opts)
+{
+ struct opt *opt;
+ char id[TIPC_MAX_BEARER_NAME];
+
+ if (!(opt = get_opt(opts, "device"))) {
+ fprintf(stderr, "error: missing bearer device\n");
+ return -EINVAL;
+ }
+ snprintf(id, sizeof(id), "eth:%s", opt->val);
+
+ mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, id);
+
+ return 0;
+}
+
+static int add_udp_bearer(struct nlmsghdr *nlh, struct opt *opts)
+{
+ struct opt *opt;
+ char id[TIPC_MAX_BEARER_NAME];
+
+ if (!(opt = get_opt(opts, "name"))) {
+ fprintf(stderr, "error: missing bearer name\n");
+ return -EINVAL;
+ }
+ snprintf(id, sizeof(id), "udp:%s", opt->val);
+
+ mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, id);
+
+ return 0;
+}
+
+static void cmd_bearer_disable_l2_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s bearer disable media udp device DEVICE\n",
+ cmdl->argv[0]);
+}
+
+static void cmd_bearer_disable_udp_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s bearer disable media udp name NAME\n",
+ cmdl->argv[0]);
+}
+
+static void cmd_bearer_disable_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s bearer disable media MEDIA ARGS...\n",
+ cmdl->argv[0]);
+ _print_bearer_media();
+}
+
+static int cmd_bearer_disable(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int err;
+ char *media;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *nest;
+ struct opt *opt;
+ struct opt opts[] = {
+ { "device", NULL },
+ { "name", NULL },
+ { "media", NULL },
+ { NULL }
+ };
+
+ if (parse_opts(opts, cmdl) < 0) {
+ if (help_flag)
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (!(opt = get_opt(opts, "media"))) {
+ if (help_flag)
+ (cmd->help)(cmdl);
+ else
+ fprintf(stderr, "error, missing bearer media\n");
+ return -EINVAL;
+ }
+ media = opt->val;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_BEARER_DISABLE))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_BEARER);
+
+ if (strcmp(media, "udp") == 0) {
+ if (help_flag) {
+ cmd_bearer_disable_udp_help(cmdl);
+ return -EINVAL;
+ }
+ if ((err = add_udp_bearer(nlh, opts)))
+ return err;
+ } else if ((strcmp(media, "eth") == 0) || (strcmp(media, "udp") == 0)) {
+ if (help_flag) {
+ cmd_bearer_disable_l2_help(cmdl);
+ return -EINVAL;
+ }
+ if ((err = add_l2_bearer(nlh, opts)))
+ return err;
+ } else {
+ fprintf(stderr, "error, invalid media type \"%s\"\n", media);
+ return -EINVAL;
+ }
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_doit(nlh, NULL, NULL);
+
+}
+
+static void cmd_bearer_set_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s bearer set OPTION media MEDIA ARGS...\n",
+ cmdl->argv[0]);
+ _print_bearer_opts();
+ _print_bearer_media();
+}
+
+static void cmd_bearer_set_udp_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s bearer set OPTION media udp name NAME\n\n",
+ cmdl->argv[0]);
+ _print_bearer_opts();
+}
+
+static void cmd_bearer_set_l2_help(struct cmdl *cmdl, char *media)
+{
+ fprintf(stderr,
+ "Usage: %s bearer set [OPTION]... media %s device DEVICE\n",
+ cmdl->argv[0], media);
+ _print_bearer_opts();
+}
+
+static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int err;
+ int val;
+ int prop;
+ char *media;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *props;
+ struct nlattr *attrs;
+ struct opt *opt;
+ struct opt opts[] = {
+ { "device", NULL },
+ { "media", NULL },
+ { "name", NULL },
+ { NULL }
+ };
+
+ if (strcmp(cmd->cmd, "priority") == 0)
+ prop = TIPC_NLA_PROP_PRIO;
+ else if ((strcmp(cmd->cmd, "tolerance") == 0))
+ prop = TIPC_NLA_PROP_TOL;
+ else if ((strcmp(cmd->cmd, "window") == 0))
+ prop = TIPC_NLA_PROP_WIN;
+ else
+ return -EINVAL;
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (cmdl->optind >= cmdl->argc) {
+ fprintf(stderr, "error, missing value\n");
+ return -EINVAL;
+ }
+ val = atoi(shift_cmdl(cmdl));
+
+ if (parse_opts(opts, cmdl) < 0)
+ return -EINVAL;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_BEARER_SET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+ attrs = mnl_attr_nest_start(nlh, TIPC_NLA_BEARER);
+
+ props = mnl_attr_nest_start(nlh, TIPC_NLA_BEARER_PROP);
+ mnl_attr_put_u32(nlh, prop, val);
+ mnl_attr_nest_end(nlh, props);
+
+ if (!(opt = get_opt(opts, "media"))) {
+ fprintf(stderr, "error, missing media\n");
+ return -EINVAL;
+ }
+ media = opt->val;
+
+ if (strcmp(media, "udp") == 0) {
+ if (help_flag) {
+ cmd_bearer_set_udp_help(cmdl);
+ return -EINVAL;
+ }
+ if ((err = add_udp_bearer(nlh, opts)))
+ return err;
+ } else if ((strcmp(media, "eth") == 0) || (strcmp(media, "udp") == 0)) {
+ if (help_flag) {
+ cmd_bearer_set_l2_help(cmdl, media);
+ return -EINVAL;
+ }
+ if ((err = add_l2_bearer(nlh, opts)))
+ return err;
+ } else {
+ fprintf(stderr, "error, invalid media type \"%s\"\n", media);
+ return -EINVAL;
+ }
+ mnl_attr_nest_end(nlh, attrs);
+
+ return msg_doit(nlh, NULL, NULL);
+}
+
+static int cmd_bearer_set(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "priority", cmd_bearer_set_prop, cmd_bearer_set_help },
+ { "tolerance", cmd_bearer_set_prop, cmd_bearer_set_help },
+ { "window", cmd_bearer_set_prop, cmd_bearer_set_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+static void cmd_bearer_get_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s bearer get OPTION media MEDIA ARGS...\n",
+ cmdl->argv[0]);
+ _print_bearer_opts();
+ _print_bearer_media();
+}
+
+static void cmd_bearer_get_udp_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s bearer get OPTION media udp name NAME\n\n",
+ cmdl->argv[0]);
+ _print_bearer_opts();
+}
+
+static void cmd_bearer_get_l2_help(struct cmdl *cmdl, char *media)
+{
+ fprintf(stderr,
+ "Usage: %s bearer get [OPTION]... media %s device DEVICE\n",
+ cmdl->argv[0], media);
+ _print_bearer_opts();
+}
+
+static int bearer_get_cb(const struct nlmsghdr *nlh, void *data)
+{
+ int *prop = data;
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1] = {};
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_BEARER])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_BEARER], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_BEARER_PROP])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(attrs[TIPC_NLA_BEARER_PROP], parse_attrs, props);
+ if (!props[*prop])
+ return MNL_CB_ERROR;
+
+ printf("%u\n", mnl_attr_get_u32(props[*prop]));
+
+ return MNL_CB_OK;
+}
+
+static int cmd_bearer_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int err;
+ int prop;
+ char *media;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *attrs;
+ struct opt *opt;
+ struct opt opts[] = {
+ { "device", NULL },
+ { "media", NULL },
+ { "name", NULL },
+ { NULL }
+ };
+
+ if (strcmp(cmd->cmd, "priority") == 0)
+ prop = TIPC_NLA_PROP_PRIO;
+ else if ((strcmp(cmd->cmd, "tolerance") == 0))
+ prop = TIPC_NLA_PROP_TOL;
+ else if ((strcmp(cmd->cmd, "window") == 0))
+ prop = TIPC_NLA_PROP_WIN;
+ else
+ return -EINVAL;
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (parse_opts(opts, cmdl) < 0)
+ return -EINVAL;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_BEARER_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ if (!(opt = get_opt(opts, "media"))) {
+ fprintf(stderr, "error, missing media\n");
+ return -EINVAL;
+ }
+ media = opt->val;
+
+ attrs = mnl_attr_nest_start(nlh, TIPC_NLA_BEARER);
+ if (strcmp(media, "udp") == 0) {
+ if (help_flag) {
+ cmd_bearer_get_udp_help(cmdl);
+ return -EINVAL;
+ }
+ if ((err = add_udp_bearer(nlh, opts)))
+ return err;
+ } else if ((strcmp(media, "eth") == 0) || (strcmp(media, "udp") == 0)) {
+ if (help_flag) {
+ cmd_bearer_get_l2_help(cmdl, media);
+ return -EINVAL;
+ }
+ if ((err = add_l2_bearer(nlh, opts)))
+ return err;
+ } else {
+ fprintf(stderr, "error, invalid media type \"%s\"\n", media);
+ return -EINVAL;
+ }
+ mnl_attr_nest_end(nlh, attrs);
+
+ return msg_doit(nlh, bearer_get_cb, &prop);
+}
+
+static int cmd_bearer_get(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "priority", cmd_bearer_get_prop, cmd_bearer_get_help },
+ { "tolerance", cmd_bearer_get_prop, cmd_bearer_get_help },
+ { "window", cmd_bearer_get_prop, cmd_bearer_get_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+static int bearer_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_BEARER]) {
+ fprintf(stderr, "No bearer in netlink response\n");
+ return MNL_CB_ERROR;
+ }
+
+ mnl_attr_parse_nested(info[TIPC_NLA_BEARER], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_BEARER_NAME]) {
+ fprintf(stderr, "Bearer name missing in netlink response\n");
+ return MNL_CB_ERROR;
+ }
+
+ printf("%s\n", mnl_attr_get_str(attrs[TIPC_NLA_BEARER_NAME]));
+
+ return MNL_CB_OK;
+}
+
+static int cmd_bearer_list(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (help_flag) {
+ fprintf(stderr, "Usage: %s bearer list\n", cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_BEARER_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ return msg_dumpit(nlh, bearer_list_cb, NULL);
+}
+
+void cmd_bearer_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s bearer COMMAND [ARGS] ...\n"
+ "\n"
+ "COMMANDS\n"
+ " enable - Enable a bearer\n"
+ " disable - Disable a bearer\n"
+ " set - Set various bearer properties\n"
+ " get - Get various bearer properties\n"
+ " list - List bearers\n", cmdl->argv[0]);
+}
+
+int cmd_bearer(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data)
+{
+ const struct cmd cmds[] = {
+ { "disable", cmd_bearer_disable, cmd_bearer_disable_help },
+ { "enable", cmd_bearer_enable, cmd_bearer_enable_help },
+ { "get", cmd_bearer_get, cmd_bearer_get_help },
+ { "list", cmd_bearer_list, NULL },
+ { "set", cmd_bearer_set, cmd_bearer_set_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
diff --git a/tipc/bearer.h b/tipc/bearer.h
new file mode 100644
index 00000000..9459d65e
--- /dev/null
+++ b/tipc/bearer.h
@@ -0,0 +1,22 @@
+/*
+ * bearer.h TIPC bearer functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_BEARER_H
+#define _TIPC_BEARER_H
+
+#include "cmdl.h"
+
+extern int help_flag;
+
+int cmd_bearer(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data);
+void cmd_bearer_help(struct cmdl *cmdl);
+
+#endif
diff --git a/tipc/cmdl.c b/tipc/cmdl.c
new file mode 100644
index 00000000..b816f7d4
--- /dev/null
+++ b/tipc/cmdl.c
@@ -0,0 +1,127 @@
+/*
+ * cmdl.c Framework for handling command line options.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <libmnl/libmnl.h>
+
+#include "cmdl.h"
+
+const struct cmd *find_cmd(const struct cmd *cmds, char *str)
+{
+ const struct cmd *c;
+ const struct cmd *match = NULL;
+
+ for (c = cmds; c->cmd; c++) {
+ if (strstr(c->cmd, str) != c->cmd)
+ continue;
+ if (match)
+ return NULL;
+ match = c;
+ }
+
+ return match;
+}
+
+static struct opt *find_opt(struct opt *opts, char *str)
+{
+ struct opt *o;
+ struct opt *match = NULL;
+
+ for (o = opts; o->key; o++) {
+ if (strstr(o->key, str) != o->key)
+ continue;
+ if (match)
+ return NULL;
+
+ match = o;
+ }
+
+ return match;
+}
+
+struct opt *get_opt(struct opt *opts, char *key)
+{
+ struct opt *o;
+
+ for (o = opts; o->key; o++) {
+ if (strcmp(o->key, key) == 0 && o->val)
+ return o;
+ }
+
+ return NULL;
+}
+
+char *shift_cmdl(struct cmdl *cmdl)
+{
+ int next;
+
+ if (cmdl->optind < cmdl->argc)
+ next = (cmdl->optind)++;
+ else
+ next = cmdl->argc;
+
+ return cmdl->argv[next];
+}
+
+/* Returns the number of options parsed or a negative error code upon failure */
+int parse_opts(struct opt *opts, struct cmdl *cmdl)
+{
+ int i;
+ int cnt = 0;
+
+ for (i = cmdl->optind; i < cmdl->argc; i += 2) {
+ struct opt *o;
+
+ o = find_opt(opts, cmdl->argv[i]);
+ if (!o) {
+ fprintf(stderr, "error, invalid option \"%s\"\n",
+ cmdl->argv[i]);
+ return -EINVAL;
+ }
+ cnt++;
+ o->val = cmdl->argv[i + 1];
+ cmdl->optind += 2;
+ }
+
+ return cnt;
+}
+
+int run_cmd(struct nlmsghdr *nlh, const struct cmd *caller,
+ const struct cmd *cmds, struct cmdl *cmdl, void *data)
+{
+ char *name;
+ const struct cmd *cmd;
+
+ if ((cmdl->optind) >= cmdl->argc) {
+ if (caller->help)
+ (caller->help)(cmdl);
+ return -EINVAL;
+ }
+ name = cmdl->argv[cmdl->optind];
+ (cmdl->optind)++;
+
+ cmd = find_cmd(cmds, name);
+ if (!cmd) {
+ /* Show help about last command if we don't find this one */
+ if (help_flag && caller->help) {
+ (caller->help)(cmdl);
+ } else {
+ fprintf(stderr, "error, invalid command \"%s\"\n", name);
+ fprintf(stderr, "use --help for command help\n");
+ }
+ return -EINVAL;
+ }
+
+ return (cmd->func)(nlh, cmd, cmdl, data);
+}
diff --git a/tipc/cmdl.h b/tipc/cmdl.h
new file mode 100644
index 00000000..9f2666f8
--- /dev/null
+++ b/tipc/cmdl.h
@@ -0,0 +1,46 @@
+/*
+ * cmdl.h Framework for handling command line options.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_CMDL_H
+#define _TIPC_CMDL_H
+
+#include <libmnl/libmnl.h>
+
+extern int help_flag;
+
+struct cmdl {
+ int optind;
+ int argc;
+ char **argv;
+};
+
+struct cmd {
+ const char *cmd;
+ int (*func)(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data);
+ void (*help)(struct cmdl *cmdl);
+};
+
+struct opt {
+ const char *key;
+ char *val;
+};
+
+struct opt *get_opt(struct opt *opts, char *key);
+int parse_opts(struct opt *opts, struct cmdl *cmdl);
+char *shift_cmdl(struct cmdl *cmdl);
+
+int run_cmd(struct nlmsghdr *nlh, const struct cmd *caller,
+ const struct cmd *cmds, struct cmdl *cmdl, void *data);
+
+const struct cmd *find_cmd(const struct cmd *cmds, char *str);
+
+#endif
diff --git a/tipc/link.c b/tipc/link.c
new file mode 100644
index 00000000..89fb4ff4
--- /dev/null
+++ b/tipc/link.c
@@ -0,0 +1,520 @@
+/*
+ * link.c TIPC link functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <linux/tipc_netlink.h>
+#include <linux/tipc.h>
+#include <linux/genetlink.h>
+#include <libmnl/libmnl.h>
+
+#include "cmdl.h"
+#include "msg.h"
+#include "link.h"
+
+static int link_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_LINK])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_LINK], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_LINK_NAME])
+ return MNL_CB_ERROR;
+
+ printf("%s: ", mnl_attr_get_str(attrs[TIPC_NLA_LINK_NAME]));
+
+ if (attrs[TIPC_NLA_LINK_UP])
+ printf("up\n");
+ else
+ printf("down\n");
+
+ return MNL_CB_OK;
+}
+
+static int cmd_link_list(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (help_flag) {
+ fprintf(stderr, "Usage: %s link list\n", cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_LINK_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ return msg_dumpit(nlh, link_list_cb, NULL);
+}
+
+static int link_get_cb(const struct nlmsghdr *nlh, void *data)
+{
+ int *prop = data;
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1] = {};
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_LINK])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_LINK], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_LINK_PROP])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(attrs[TIPC_NLA_LINK_PROP], parse_attrs, props);
+ if (!props[*prop])
+ return MNL_CB_ERROR;
+
+ printf("%u\n", mnl_attr_get_u32(props[*prop]));
+
+ return MNL_CB_OK;
+}
+
+
+static int cmd_link_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int prop;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct opt *opt;
+ struct opt opts[] = {
+ { "link", NULL },
+ { NULL }
+ };
+
+ if (strcmp(cmd->cmd, "priority") == 0)
+ prop = TIPC_NLA_PROP_PRIO;
+ else if ((strcmp(cmd->cmd, "tolerance") == 0))
+ prop = TIPC_NLA_PROP_TOL;
+ else if ((strcmp(cmd->cmd, "window") == 0))
+ prop = TIPC_NLA_PROP_WIN;
+ else
+ return -EINVAL;
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (parse_opts(opts, cmdl) < 0)
+ return -EINVAL;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_LINK_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ if (!(opt = get_opt(opts, "link"))) {
+ fprintf(stderr, "error, missing link\n");
+ return -EINVAL;
+ }
+ mnl_attr_put_strz(nlh, TIPC_NLA_LINK_NAME, opt->val);
+
+ return msg_doit(nlh, link_get_cb, &prop);
+}
+
+static void cmd_link_get_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s link get PPROPERTY link LINK\n\n"
+ "PROPERTIES\n"
+ " tolerance - Get link tolerance\n"
+ " priority - Get link priority\n"
+ " window - Get link window\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_link_get(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "priority", cmd_link_get_prop, cmd_link_get_help },
+ { "tolerance", cmd_link_get_prop, cmd_link_get_help },
+ { "window", cmd_link_get_prop, cmd_link_get_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+static void cmd_link_stat_reset_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s link stat reset link LINK\n\n", cmdl->argv[0]);
+}
+
+static int cmd_link_stat_reset(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char *link;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct opt *opt;
+ struct nlattr *nest;
+ struct opt opts[] = {
+ { "link", NULL },
+ { NULL }
+ };
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (parse_opts(opts, cmdl) != 1) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_LINK_RESET_STATS))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ if (!(opt = get_opt(opts, "link"))) {
+ fprintf(stderr, "error, missing link\n");
+ return -EINVAL;
+ }
+ link = opt->val;
+
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_LINK);
+ mnl_attr_put_strz(nlh, TIPC_NLA_LINK_NAME, link);
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_doit(nlh, NULL, NULL);
+}
+
+static uint32_t perc(uint32_t count, uint32_t total)
+{
+ return (count * 100 + (total / 2)) / total;
+}
+
+static int _show_link_stat(struct nlattr *attrs[], struct nlattr *prop[],
+ struct nlattr *stats[])
+{
+ uint32_t proft;
+
+ if (attrs[TIPC_NLA_LINK_ACTIVE])
+ printf(" ACTIVE");
+ else if (attrs[TIPC_NLA_LINK_UP])
+ printf(" STANDBY");
+ else
+ printf(" DEFUNCT");
+
+ printf(" MTU:%u Priority:%u Tolerance:%u ms Window:%u packets\n",
+ mnl_attr_get_u32(attrs[TIPC_NLA_LINK_MTU]),
+ mnl_attr_get_u32(prop[TIPC_NLA_PROP_PRIO]),
+ mnl_attr_get_u32(prop[TIPC_NLA_PROP_TOL]),
+ mnl_attr_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+ printf(" RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ mnl_attr_get_u32(attrs[TIPC_NLA_LINK_RX]) -
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+ printf(" TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ mnl_attr_get_u32(attrs[TIPC_NLA_LINK_TX]) -
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+ proft = mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]);
+ printf(" TX profile sample:%u packets average:%u octets\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) / proft);
+
+ printf(" 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
+ "-16384:%u%% -32768:%u%% -66000:%u%%\n",
+ perc(mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]), proft),
+ perc(mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]), proft),
+ perc(mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]), proft),
+ perc(mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]), proft),
+ perc(mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]), proft),
+ perc(mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]), proft),
+ perc(mnl_attr_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]), proft));
+
+ printf(" RX states:%u probes:%u naks:%u defs:%u dups:%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_STATES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+ printf(" TX states:%u probes:%u naks:%u acks:%u dups:%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_STATES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+ printf(" Congestion link:%u Send queue max:%u avg:%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+
+ return MNL_CB_OK;
+}
+
+static int _show_bc_link_stat(struct nlattr *prop[], struct nlattr *stats[])
+{
+ printf(" Window:%u packets\n",
+ mnl_attr_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+ printf(" RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+ printf(" TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+ printf(" RX naks:%u defs:%u dups:%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+ printf(" TX naks:%u acks:%u dups:%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+ printf(" Congestion link:%u Send queue max:%u avg:%u\n",
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+ mnl_attr_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+
+ return MNL_CB_OK;
+}
+
+static int link_stat_show_cb(const struct nlmsghdr *nlh, void *data)
+{
+ const char *name;
+ const char *link = data;
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1] = {};
+ struct nlattr *prop[TIPC_NLA_PROP_MAX + 1] = {};
+ struct nlattr *stats[TIPC_NLA_STATS_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_LINK])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_LINK], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_LINK_NAME] || !attrs[TIPC_NLA_LINK_PROP] ||
+ !attrs[TIPC_NLA_LINK_STATS])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(attrs[TIPC_NLA_LINK_PROP], parse_attrs, prop);
+ mnl_attr_parse_nested(attrs[TIPC_NLA_LINK_STATS], parse_attrs, stats);
+
+ name = mnl_attr_get_str(attrs[TIPC_NLA_LINK_NAME]);
+
+ /* If a link is passed, skip all but that link */
+ if (link && (strcmp(name, link) != 0))
+ return MNL_CB_OK;
+
+ if (attrs[TIPC_NLA_LINK_BROADCAST]) {
+ printf("Link <%s>\n", name);
+ return _show_bc_link_stat(prop, stats);
+ }
+
+ printf("\nLink <%s>\n", name);
+
+ return _show_link_stat(attrs, prop, stats);
+}
+
+static void cmd_link_stat_show_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s link stat show [ link LINK ]\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_link_stat_show(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char *link = NULL;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct opt *opt;
+ struct opt opts[] = {
+ { "link", NULL },
+ { NULL }
+ };
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_LINK_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ if (parse_opts(opts, cmdl) < 0)
+ return -EINVAL;
+
+ if ((opt = get_opt(opts, "link")))
+ link = opt->val;
+
+ return msg_dumpit(nlh, link_stat_show_cb, link);
+}
+
+static void cmd_link_stat_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s link stat COMMAND [ARGS]\n\n"
+ "COMMANDS:\n"
+ " reset - Reset link statistics for link\n"
+ " show - Get link priority\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_link_stat(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "reset", cmd_link_stat_reset, cmd_link_stat_reset_help },
+ { "show", cmd_link_stat_show, cmd_link_stat_show_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+static void cmd_link_set_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s link set PPROPERTY link LINK\n\n"
+ "PROPERTIES\n"
+ " tolerance TOLERANCE - Set link tolerance\n"
+ " priority PRIORITY - Set link priority\n"
+ " window WINDOW - Set link window\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_link_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int val;
+ int prop;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *props;
+ struct nlattr *attrs;
+ struct opt *opt;
+ struct opt opts[] = {
+ { "link", NULL },
+ { NULL }
+ };
+
+ if (strcmp(cmd->cmd, "priority") == 0)
+ prop = TIPC_NLA_PROP_PRIO;
+ else if ((strcmp(cmd->cmd, "tolerance") == 0))
+ prop = TIPC_NLA_PROP_TOL;
+ else if ((strcmp(cmd->cmd, "window") == 0))
+ prop = TIPC_NLA_PROP_WIN;
+ else
+ return -EINVAL;
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (cmdl->optind >= cmdl->argc) {
+ fprintf(stderr, "error, missing value\n");
+ return -EINVAL;
+ }
+ val = atoi(shift_cmdl(cmdl));
+
+ if (parse_opts(opts, cmdl) < 0)
+ return -EINVAL;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_LINK_SET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+ attrs = mnl_attr_nest_start(nlh, TIPC_NLA_LINK);
+
+ if (!(opt = get_opt(opts, "link"))) {
+ fprintf(stderr, "error, missing link\n");
+ return -EINVAL;
+ }
+ mnl_attr_put_strz(nlh, TIPC_NLA_LINK_NAME, opt->val);
+
+ props = mnl_attr_nest_start(nlh, TIPC_NLA_LINK_PROP);
+ mnl_attr_put_u32(nlh, prop, val);
+ mnl_attr_nest_end(nlh, props);
+
+ mnl_attr_nest_end(nlh, attrs);
+
+ return msg_doit(nlh, link_get_cb, &prop);
+
+ return 0;
+}
+
+static int cmd_link_set(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "priority", cmd_link_set_prop, cmd_link_set_help },
+ { "tolerance", cmd_link_set_prop, cmd_link_set_help },
+ { "window", cmd_link_set_prop, cmd_link_set_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+void cmd_link_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s link COMMAND [ARGS] ...\n"
+ "\n"
+ "COMMANDS\n"
+ " list - List links\n"
+ " get - Get various link properties\n"
+ " set - Set various link properties\n"
+ " statistics - Show or reset statistics\n",
+ cmdl->argv[0]);
+}
+
+int cmd_link(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data)
+{
+ const struct cmd cmds[] = {
+ { "get", cmd_link_get, cmd_link_get_help },
+ { "list", cmd_link_list, NULL },
+ { "set", cmd_link_set, cmd_link_set_help },
+ { "statistics", cmd_link_stat, cmd_link_stat_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
diff --git a/tipc/link.h b/tipc/link.h
new file mode 100644
index 00000000..6dc95e5b
--- /dev/null
+++ b/tipc/link.h
@@ -0,0 +1,21 @@
+/*
+ * link.c TIPC link functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_LINK_H
+#define _TIPC_LINK_H
+
+extern int help_flag;
+
+int cmd_link(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data);
+void cmd_link_help(struct cmdl *cmdl);
+
+#endif
diff --git a/tipc/media.c b/tipc/media.c
new file mode 100644
index 00000000..a902ab78
--- /dev/null
+++ b/tipc/media.c
@@ -0,0 +1,260 @@
+/*
+ * media.c TIPC link functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <linux/tipc_netlink.h>
+#include <linux/tipc.h>
+#include <linux/genetlink.h>
+#include <libmnl/libmnl.h>
+
+#include "cmdl.h"
+#include "msg.h"
+#include "media.h"
+
+static int media_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_MEDIA])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_MEDIA], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_MEDIA_NAME])
+ return MNL_CB_ERROR;
+
+ printf("%s\n", mnl_attr_get_str(attrs[TIPC_NLA_MEDIA_NAME]));
+
+ return MNL_CB_OK;
+}
+
+static int cmd_media_list(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (help_flag) {
+ fprintf(stderr, "Usage: %s media list\n", cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_MEDIA_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ return msg_dumpit(nlh, media_list_cb, NULL);
+}
+
+static int media_get_cb(const struct nlmsghdr *nlh, void *data)
+{
+ int *prop = data;
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1] = {};
+ struct nlattr *props[TIPC_NLA_PROP_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_MEDIA])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_MEDIA], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_MEDIA_PROP])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(attrs[TIPC_NLA_MEDIA_PROP], parse_attrs, props);
+ if (!props[*prop])
+ return MNL_CB_ERROR;
+
+ printf("%u\n", mnl_attr_get_u32(props[*prop]));
+
+ return MNL_CB_OK;
+}
+
+static int cmd_media_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int prop;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *nest;
+ struct opt *opt;
+ struct opt opts[] = {
+ { "media", NULL },
+ { NULL }
+ };
+
+ if (strcmp(cmd->cmd, "priority") == 0)
+ prop = TIPC_NLA_PROP_PRIO;
+ else if ((strcmp(cmd->cmd, "tolerance") == 0))
+ prop = TIPC_NLA_PROP_TOL;
+ else if ((strcmp(cmd->cmd, "window") == 0))
+ prop = TIPC_NLA_PROP_WIN;
+ else
+ return -EINVAL;
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (parse_opts(opts, cmdl) < 0)
+ return -EINVAL;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_MEDIA_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ if (!(opt = get_opt(opts, "media"))) {
+ fprintf(stderr, "error, missing media\n");
+ return -EINVAL;
+ }
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_MEDIA);
+ mnl_attr_put_strz(nlh, TIPC_NLA_MEDIA_NAME, opt->val);
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_doit(nlh, media_get_cb, &prop);
+}
+
+static void cmd_media_get_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s media get PPROPERTY media MEDIA\n\n"
+ "PROPERTIES\n"
+ " tolerance - Get media tolerance\n"
+ " priority - Get media priority\n"
+ " window - Get media window\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_media_get(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "priority", cmd_media_get_prop, cmd_media_get_help },
+ { "tolerance", cmd_media_get_prop, cmd_media_get_help },
+ { "window", cmd_media_get_prop, cmd_media_get_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+static void cmd_media_set_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s media set PPROPERTY media MEDIA\n\n"
+ "PROPERTIES\n"
+ " tolerance TOLERANCE - Set media tolerance\n"
+ " priority PRIORITY - Set media priority\n"
+ " window WINDOW - Set media window\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_media_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int val;
+ int prop;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *props;
+ struct nlattr *attrs;
+ struct opt *opt;
+ struct opt opts[] = {
+ { "media", NULL },
+ { NULL }
+ };
+
+ if (strcmp(cmd->cmd, "priority") == 0)
+ prop = TIPC_NLA_PROP_PRIO;
+ else if ((strcmp(cmd->cmd, "tolerance") == 0))
+ prop = TIPC_NLA_PROP_TOL;
+ else if ((strcmp(cmd->cmd, "window") == 0))
+ prop = TIPC_NLA_PROP_WIN;
+ else
+ return -EINVAL;
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (cmdl->optind >= cmdl->argc) {
+ fprintf(stderr, "error, missing value\n");
+ return -EINVAL;
+ }
+ val = atoi(shift_cmdl(cmdl));
+
+ if (parse_opts(opts, cmdl) < 0)
+ return -EINVAL;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_MEDIA_SET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+ attrs = mnl_attr_nest_start(nlh, TIPC_NLA_MEDIA);
+
+ if (!(opt = get_opt(opts, "media"))) {
+ fprintf(stderr, "error, missing media\n");
+ return -EINVAL;
+ }
+ mnl_attr_put_strz(nlh, TIPC_NLA_MEDIA_NAME, opt->val);
+
+ props = mnl_attr_nest_start(nlh, TIPC_NLA_MEDIA_PROP);
+ mnl_attr_put_u32(nlh, prop, val);
+ mnl_attr_nest_end(nlh, props);
+
+ mnl_attr_nest_end(nlh, attrs);
+
+ return msg_doit(nlh, NULL, NULL);
+}
+
+static int cmd_media_set(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "priority", cmd_media_set_prop, cmd_media_set_help },
+ { "tolerance", cmd_media_set_prop, cmd_media_set_help },
+ { "window", cmd_media_set_prop, cmd_media_set_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+void cmd_media_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s media COMMAND [ARGS] ...\n"
+ "\n"
+ "Commands:\n"
+ " list - List active media types\n"
+ " get - Get various media properties\n"
+ " set - Set various media properties\n",
+ cmdl->argv[0]);
+}
+
+int cmd_media(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data)
+{
+ const struct cmd cmds[] = {
+ { "get", cmd_media_get, cmd_media_get_help },
+ { "list", cmd_media_list, NULL },
+ { "set", cmd_media_set, cmd_media_set_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
diff --git a/tipc/media.h b/tipc/media.h
new file mode 100644
index 00000000..8584af74
--- /dev/null
+++ b/tipc/media.h
@@ -0,0 +1,21 @@
+/*
+ * media.h TIPC link functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_MEDIA_H
+#define _TIPC_MEDIA_H
+
+extern int help_flag;
+
+int cmd_media(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data);
+void cmd_media_help(struct cmdl *cmdl);
+
+#endif
diff --git a/tipc/misc.c b/tipc/misc.c
new file mode 100644
index 00000000..80912228
--- /dev/null
+++ b/tipc/misc.c
@@ -0,0 +1,35 @@
+/*
+ * misc.c Miscellaneous TIPC helper functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/tipc.h>
+
+#include "misc.h"
+
+#define IN_RANGE(val, low, high) ((val) <= (high) && (val) >= (low))
+
+uint32_t str2addr(char *str)
+{
+ unsigned int z, c, n;
+ char dummy;
+
+ if (sscanf(str, "%u.%u.%u%c", &z, &c, &n, &dummy) != 3) {
+ fprintf(stderr, "invalid network address, syntax: Z.C.N\n");
+ return 0;
+ }
+
+ if (IN_RANGE(z, 0, 255) && IN_RANGE(c, 0, 4095) && IN_RANGE(n, 0, 4095))
+ return tipc_addr(z, c, n);
+
+ fprintf(stderr, "invalid network address \"%s\"\n", str);
+ return 0;
+}
diff --git a/tipc/misc.h b/tipc/misc.h
new file mode 100644
index 00000000..585df745
--- /dev/null
+++ b/tipc/misc.h
@@ -0,0 +1,19 @@
+/*
+ * misc.h Miscellaneous TIPC helper functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_MISC_H
+#define _TIPC_MISC_H
+
+#include <stdint.h>
+
+uint32_t str2addr(char *str);
+
+#endif
diff --git a/tipc/msg.c b/tipc/msg.c
new file mode 100644
index 00000000..22c22226
--- /dev/null
+++ b/tipc/msg.c
@@ -0,0 +1,170 @@
+/*
+ * msg.c Messaging (netlink) helper functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include <errno.h>
+
+#include <linux/tipc_netlink.h>
+#include <linux/tipc.h>
+#include <linux/genetlink.h>
+#include <libmnl/libmnl.h>
+
+#include "msg.h"
+
+int parse_attrs(const struct nlattr *attr, void *data)
+{
+ const struct nlattr **tb = data;
+ int type = mnl_attr_get_type(attr);
+
+ tb[type] = attr;
+
+ return MNL_CB_OK;
+}
+
+static int family_id_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[CTRL_ATTR_MAX + 1] = {};
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ int *id = data;
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, tb);
+ if (!tb[CTRL_ATTR_FAMILY_ID])
+ return MNL_CB_ERROR;
+
+ *id = mnl_attr_get_u16(tb[CTRL_ATTR_FAMILY_ID]);
+
+ return MNL_CB_OK;
+}
+
+static struct mnl_socket *msg_send(struct nlmsghdr *nlh)
+{
+ int ret;
+ struct mnl_socket *nl;
+
+ nl = mnl_socket_open(NETLINK_GENERIC);
+ if (nl == NULL) {
+ perror("mnl_socket_open");
+ return NULL;
+ }
+
+ ret = mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID);
+ if (ret < 0) {
+ perror("mnl_socket_bind");
+ return NULL;
+ }
+
+ ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+ if (ret < 0) {
+ perror("mnl_socket_send");
+ return NULL;
+ }
+
+ return nl;
+}
+
+static int msg_recv(struct mnl_socket *nl, mnl_cb_t callback, void *data, int seq)
+{
+ int ret;
+ unsigned int portid;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ portid = mnl_socket_get_portid(nl);
+
+ ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+ while (ret > 0) {
+ ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
+ if (ret <= 0)
+ break;
+ ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+ }
+ if (ret == -1)
+ perror("error");
+
+ mnl_socket_close(nl);
+
+ return ret;
+}
+
+static int msg_query(struct nlmsghdr *nlh, mnl_cb_t callback, void *data)
+{
+ unsigned int seq;
+ struct mnl_socket *nl;
+
+ seq = time(NULL);
+ nlh->nlmsg_seq = seq;
+
+ nl = msg_send(nlh);
+ if (!nl)
+ return -ENOTSUP;
+
+ return msg_recv(nl, callback, data, seq);
+}
+
+static int get_family(void)
+{
+ int err;
+ int nl_family;
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *genl;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = GENL_ID_CTRL;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+
+ genl = mnl_nlmsg_put_extra_header(nlh, sizeof(struct genlmsghdr));
+ genl->cmd = CTRL_CMD_GETFAMILY;
+ genl->version = 1;
+
+ mnl_attr_put_u32(nlh, CTRL_ATTR_FAMILY_ID, GENL_ID_CTRL);
+ mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, TIPC_GENL_V2_NAME);
+
+ if ((err = msg_query(nlh, family_id_cb, &nl_family)))
+ return err;
+
+ return nl_family;
+}
+
+int msg_doit(struct nlmsghdr *nlh, mnl_cb_t callback, void *data)
+{
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ return msg_query(nlh, callback, data);
+}
+
+int msg_dumpit(struct nlmsghdr *nlh, mnl_cb_t callback, void *data)
+{
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ return msg_query(nlh, callback, data);
+}
+
+struct nlmsghdr *msg_init(char *buf, int cmd)
+{
+ int family;
+ struct nlmsghdr *nlh;
+ struct genlmsghdr *genl;
+
+ family = get_family();
+ if (family <= 0) {
+ fprintf(stderr,
+ "Unable to get TIPC nl family id (module loaded?)\n");
+ return NULL;
+ }
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = family;
+
+ genl = mnl_nlmsg_put_extra_header(nlh, sizeof(struct genlmsghdr));
+ genl->cmd = cmd;
+ genl->version = 1;
+
+ return nlh;
+}
diff --git a/tipc/msg.h b/tipc/msg.h
new file mode 100644
index 00000000..41fd1ad1
--- /dev/null
+++ b/tipc/msg.h
@@ -0,0 +1,20 @@
+/*
+ * msg.h Messaging (netlink) helper functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_MSG_H
+#define _TIPC_MSG_H
+
+struct nlmsghdr *msg_init(char *buf, int cmd);
+int msg_doit(struct nlmsghdr *nlh, mnl_cb_t callback, void *data);
+int msg_dumpit(struct nlmsghdr *nlh, mnl_cb_t callback, void *data);
+int parse_attrs(const struct nlattr *attr, void *data);
+
+#endif
diff --git a/tipc/nametable.c b/tipc/nametable.c
new file mode 100644
index 00000000..770a644c
--- /dev/null
+++ b/tipc/nametable.c
@@ -0,0 +1,109 @@
+/*
+ * nametable.c TIPC nametable functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <errno.h>
+
+#include <linux/tipc_netlink.h>
+#include <linux/tipc.h>
+#include <linux/genetlink.h>
+#include <libmnl/libmnl.h>
+
+#include "cmdl.h"
+#include "msg.h"
+#include "nametable.h"
+
+#define PORTID_STR_LEN 45 /* Four u32 and five delimiter chars */
+
+static int nametable_show_cb(const struct nlmsghdr *nlh, void *data)
+{
+ int *iteration = data;
+ char port_id[PORTID_STR_LEN];
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_NAME_TABLE_MAX + 1] = {};
+ struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1] = {};
+ const char *scope[] = { "", "zone", "cluster", "node" };
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_NAME_TABLE])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_NAME_TABLE], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_NAME_TABLE_PUBL])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(attrs[TIPC_NLA_NAME_TABLE_PUBL], parse_attrs, publ);
+ if (!publ[TIPC_NLA_NAME_TABLE_PUBL])
+ return MNL_CB_ERROR;
+
+ if (!*iteration)
+ printf("%-10s %-10s %-10s %-26s %-10s\n",
+ "Type", "Lower", "Upper", "Port Identity",
+ "Publication Scope");
+ (*iteration)++;
+
+ snprintf(port_id, sizeof(port_id), "<%u.%u.%u:%u>",
+ tipc_zone(mnl_attr_get_u32(publ[TIPC_NLA_PUBL_NODE])),
+ tipc_cluster(mnl_attr_get_u32(publ[TIPC_NLA_PUBL_NODE])),
+ tipc_node(mnl_attr_get_u32(publ[TIPC_NLA_PUBL_NODE])),
+ mnl_attr_get_u32(publ[TIPC_NLA_PUBL_REF]));
+
+ printf("%-10u %-10u %-10u %-26s %-12u",
+ mnl_attr_get_u32(publ[TIPC_NLA_PUBL_TYPE]),
+ mnl_attr_get_u32(publ[TIPC_NLA_PUBL_LOWER]),
+ mnl_attr_get_u32(publ[TIPC_NLA_PUBL_UPPER]),
+ port_id,
+ mnl_attr_get_u32(publ[TIPC_NLA_PUBL_KEY]));
+
+ printf("%s\n", scope[mnl_attr_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]);
+
+ return MNL_CB_OK;
+}
+
+static int cmd_nametable_show(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int iteration = 0;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (help_flag) {
+ fprintf(stderr, "Usage: %s nametable show\n", cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_NAME_TABLE_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ return msg_dumpit(nlh, nametable_show_cb, &iteration);
+}
+
+void cmd_nametable_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s nametable COMMAND\n\n"
+ "COMMANDS\n"
+ " show - Show nametable\n",
+ cmdl->argv[0]);
+}
+
+int cmd_nametable(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data)
+{
+ const struct cmd cmds[] = {
+ { "show", cmd_nametable_show, NULL },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
diff --git a/tipc/nametable.h b/tipc/nametable.h
new file mode 100644
index 00000000..e0473e18
--- /dev/null
+++ b/tipc/nametable.h
@@ -0,0 +1,21 @@
+/*
+ * nametable.h TIPC nametable functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_NAMETABLE_H
+#define _TIPC_NAMETABLE_H
+
+extern int help_flag;
+
+void cmd_nametable_help(struct cmdl *cmdl);
+int cmd_nametable(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data);
+
+#endif
diff --git a/tipc/node.c b/tipc/node.c
new file mode 100644
index 00000000..201fe1a4
--- /dev/null
+++ b/tipc/node.c
@@ -0,0 +1,267 @@
+/*
+ * node.c TIPC node functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <linux/tipc_netlink.h>
+#include <linux/tipc.h>
+#include <linux/genetlink.h>
+#include <libmnl/libmnl.h>
+
+#include "cmdl.h"
+#include "msg.h"
+#include "misc.h"
+#include "node.h"
+
+static int node_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+ uint32_t addr;
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_NODE])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_NODE], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_NODE_ADDR])
+ return MNL_CB_ERROR;
+
+ addr = mnl_attr_get_u32(attrs[TIPC_NLA_NODE_ADDR]);
+ printf("<%u.%u.%u>: ",
+ tipc_zone(addr),
+ tipc_cluster(addr),
+ tipc_node(addr));
+
+ if (attrs[TIPC_NLA_NODE_UP])
+ printf("up\n");
+ else
+ printf("down\n");
+
+ return MNL_CB_OK;
+}
+
+static int cmd_node_list(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (help_flag) {
+ fprintf(stderr, "Usage: %s node list\n", cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_NODE_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ return msg_dumpit(nlh, node_list_cb, NULL);
+}
+
+static int cmd_node_set_addr(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char *str;
+ uint32_t addr;
+ struct nlattr *nest;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (cmdl->argc != cmdl->optind + 1) {
+ fprintf(stderr, "Usage: %s node set address ADDRESS\n",
+ cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ str = shift_cmdl(cmdl);
+ addr = str2addr(str);
+ if (!addr)
+ return -1;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_NET_SET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET);
+ mnl_attr_put_u32(nlh, TIPC_NLA_NET_ADDR, addr);
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_doit(nlh, NULL, NULL);
+}
+
+static int cmd_node_get_addr(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int sk;
+ socklen_t sz = sizeof(struct sockaddr_tipc);
+ struct sockaddr_tipc addr;
+
+ if (!(sk = socket(AF_TIPC, SOCK_RDM, 0))) {
+ fprintf(stderr, "opening TIPC socket: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (getsockname(sk, (struct sockaddr *)&addr, &sz) < 0) {
+ fprintf(stderr, "getting TIPC socket address: %s\n",
+ strerror(errno));
+ close(sk);
+ return -1;
+ }
+ close(sk);
+
+ printf("<%u.%u.%u>\n",
+ tipc_zone(addr.addr.id.node),
+ tipc_cluster(addr.addr.id.node),
+ tipc_node(addr.addr.id.node));
+
+ return 0;
+}
+
+static int netid_get_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_NET_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_NET])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_NET], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_NET_ID])
+ return MNL_CB_ERROR;
+
+ printf("%u\n", mnl_attr_get_u32(attrs[TIPC_NLA_NET_ID]));
+
+ return MNL_CB_OK;
+}
+
+static int cmd_node_get_netid(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_NET_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ return msg_dumpit(nlh, netid_get_cb, NULL);
+}
+
+static int cmd_node_set_netid(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ int netid;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *nest;
+
+ if (help_flag) {
+ (cmd->help)(cmdl);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_NET_SET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ if (cmdl->argc != cmdl->optind + 1) {
+ fprintf(stderr, "Usage: %s node set netid NETID\n",
+ cmdl->argv[0]);
+ return -EINVAL;
+ }
+ netid = atoi(shift_cmdl(cmdl));
+
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET);
+ mnl_attr_put_u32(nlh, TIPC_NLA_NET_ID, netid);
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_doit(nlh, NULL, NULL);
+}
+
+static void cmd_node_set_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s node set PROPERTY\n\n"
+ "PROPERTIES\n"
+ " address ADDRESS - Set local address\n"
+ " netid NETID - Set local netid\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_node_set(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "address", cmd_node_set_addr, NULL },
+ { "netid", cmd_node_set_netid, NULL },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+static void cmd_node_get_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s node get PROPERTY\n\n"
+ "PROPERTIES\n"
+ " address - Get local address\n"
+ " netid - Get local netid\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_node_get(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "address", cmd_node_get_addr, NULL },
+ { "netid", cmd_node_get_netid, NULL },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+void cmd_node_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s node COMMAND [ARGS] ...\n\n"
+ "COMMANDS\n"
+ " list - List remote nodes\n"
+ " get - Get local node parameters\n"
+ " set - Set local node parameters\n",
+ cmdl->argv[0]);
+}
+
+int cmd_node(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data)
+{
+ const struct cmd cmds[] = {
+ { "list", cmd_node_list, NULL },
+ { "get", cmd_node_get, cmd_node_get_help },
+ { "set", cmd_node_set, cmd_node_set_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
diff --git a/tipc/node.h b/tipc/node.h
new file mode 100644
index 00000000..afee1fd0
--- /dev/null
+++ b/tipc/node.h
@@ -0,0 +1,21 @@
+/*
+ * node.h TIPC node functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_NODE_H
+#define _TIPC_NODE_H
+
+extern int help_flag;
+
+int cmd_node(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data);
+void cmd_node_help(struct cmdl *cmdl);
+
+#endif
diff --git a/tipc/peer.c b/tipc/peer.c
new file mode 100644
index 00000000..de0c73c3
--- /dev/null
+++ b/tipc/peer.c
@@ -0,0 +1,93 @@
+/*
+ * peer.c TIPC peer functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <linux/tipc_netlink.h>
+#include <linux/tipc.h>
+#include <linux/genetlink.h>
+#include <libmnl/libmnl.h>
+
+#include "cmdl.h"
+#include "msg.h"
+#include "misc.h"
+#include "peer.h"
+
+static int cmd_peer_rm_addr(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char *str;
+ uint32_t addr;
+ struct nlattr *nest;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if ((cmdl->argc != cmdl->optind + 1) || help_flag) {
+ fprintf(stderr, "Usage: %s peer remove address ADDRESS\n",
+ cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ str = shift_cmdl(cmdl);
+ addr = str2addr(str);
+ if (!addr)
+ return -1;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_PEER_REMOVE))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_NET);
+ mnl_attr_put_u32(nlh, TIPC_NLA_NET_ADDR, addr);
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_doit(nlh, NULL, NULL);
+}
+
+static void cmd_peer_rm_help(struct cmdl *cmdl)
+{
+ fprintf(stderr, "Usage: %s peer remove address ADDRESS\n",
+ cmdl->argv[0]);
+}
+
+static int cmd_peer_rm(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ const struct cmd cmds[] = {
+ { "address", cmd_peer_rm_addr, cmd_peer_rm_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+void cmd_peer_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s peer COMMAND [ARGS] ...\n\n"
+ "COMMANDS\n"
+ " remove - Remove an offline peer node\n",
+ cmdl->argv[0]);
+}
+
+int cmd_peer(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data)
+{
+ const struct cmd cmds[] = {
+ { "remove", cmd_peer_rm, cmd_peer_rm_help },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
diff --git a/tipc/peer.h b/tipc/peer.h
new file mode 100644
index 00000000..89722616
--- /dev/null
+++ b/tipc/peer.h
@@ -0,0 +1,21 @@
+/*
+ * peer.h TIPC peer functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_PEER_H
+#define _TIPC_PEER_H
+
+extern int help_flag;
+
+int cmd_peer(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data);
+void cmd_peer_help(struct cmdl *cmdl);
+
+#endif
diff --git a/tipc/socket.c b/tipc/socket.c
new file mode 100644
index 00000000..48ba8215
--- /dev/null
+++ b/tipc/socket.c
@@ -0,0 +1,140 @@
+/*
+ * socket.c TIPC socket functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <errno.h>
+
+#include <linux/tipc.h>
+#include <linux/tipc_netlink.h>
+#include <linux/genetlink.h>
+#include <libmnl/libmnl.h>
+
+#include "cmdl.h"
+#include "msg.h"
+#include "socket.h"
+
+#define PORTID_STR_LEN 45 /* Four u32 and five delimiter chars */
+
+static int publ_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_SOCK_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_PUBL])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_PUBL], parse_attrs, attrs);
+
+ printf(" bound to {%u,%u,%u}\n",
+ mnl_attr_get_u32(attrs[TIPC_NLA_PUBL_TYPE]),
+ mnl_attr_get_u32(attrs[TIPC_NLA_PUBL_LOWER]),
+ mnl_attr_get_u32(attrs[TIPC_NLA_PUBL_UPPER]));
+
+ return MNL_CB_OK;
+}
+
+static int publ_list(uint32_t sock)
+{
+ struct nlmsghdr *nlh;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlattr *nest;
+
+ if (!(nlh = msg_init(buf, TIPC_NL_PUBL_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ nest = mnl_attr_nest_start(nlh, TIPC_NLA_SOCK);
+ mnl_attr_put_u32(nlh, TIPC_NLA_SOCK_REF, sock);
+ mnl_attr_nest_end(nlh, nest);
+
+ return msg_dumpit(nlh, publ_list_cb, NULL);
+}
+
+static int sock_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+ struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+ struct nlattr *attrs[TIPC_NLA_SOCK_MAX + 1] = {};
+
+ mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+ if (!info[TIPC_NLA_SOCK])
+ return MNL_CB_ERROR;
+
+ mnl_attr_parse_nested(info[TIPC_NLA_SOCK], parse_attrs, attrs);
+ if (!attrs[TIPC_NLA_SOCK_REF])
+ return MNL_CB_ERROR;
+
+ printf("socket %u\n", mnl_attr_get_u32(attrs[TIPC_NLA_SOCK_REF]));
+
+ if (attrs[TIPC_NLA_SOCK_CON]) {
+ uint32_t node;
+ struct nlattr *con[TIPC_NLA_CON_MAX + 1] = {};
+
+ mnl_attr_parse_nested(attrs[TIPC_NLA_SOCK_CON], parse_attrs, con);
+ node = mnl_attr_get_u32(con[TIPC_NLA_CON_NODE]);
+
+ printf(" connected to <%u.%u.%u:%u>", tipc_zone(node),
+ tipc_cluster(node), tipc_node(node),
+ mnl_attr_get_u32(con[TIPC_NLA_CON_SOCK]));
+
+ if (con[TIPC_NLA_CON_FLAG])
+ printf(" via {%u,%u}\n",
+ mnl_attr_get_u32(con[TIPC_NLA_CON_TYPE]),
+ mnl_attr_get_u32(con[TIPC_NLA_CON_INST]));
+ else
+ printf("\n");
+ } else if (attrs[TIPC_NLA_SOCK_HAS_PUBL]) {
+ publ_list(mnl_attr_get_u32(attrs[TIPC_NLA_SOCK_REF]));
+ }
+
+ return MNL_CB_OK;
+}
+
+static int cmd_socket_list(struct nlmsghdr *nlh, const struct cmd *cmd,
+ struct cmdl *cmdl, void *data)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+
+ if (help_flag) {
+ fprintf(stderr, "Usage: %s socket list\n", cmdl->argv[0]);
+ return -EINVAL;
+ }
+
+ if (!(nlh = msg_init(buf, TIPC_NL_SOCK_GET))) {
+ fprintf(stderr, "error, message initialisation failed\n");
+ return -1;
+ }
+
+ return msg_dumpit(nlh, sock_list_cb, NULL);
+}
+
+void cmd_socket_help(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Usage: %s socket COMMAND\n\n"
+ "Commands:\n"
+ " list - List sockets (ports)\n",
+ cmdl->argv[0]);
+}
+
+int cmd_socket(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data)
+{
+ const struct cmd cmds[] = {
+ { "list", cmd_socket_list, NULL },
+ { NULL }
+ };
+
+ return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
diff --git a/tipc/socket.h b/tipc/socket.h
new file mode 100644
index 00000000..9d1b6487
--- /dev/null
+++ b/tipc/socket.h
@@ -0,0 +1,21 @@
+/*
+ * socket.h TIPC socket functionality.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#ifndef _TIPC_SOCKET_H
+#define _TIPC_SOCKET_H
+
+extern int help_flag;
+
+void cmd_socket_help(struct cmdl *cmdl);
+int cmd_socket(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+ void *data);
+
+#endif
diff --git a/tipc/tipc.c b/tipc/tipc.c
new file mode 100644
index 00000000..600d5e2a
--- /dev/null
+++ b/tipc/tipc.c
@@ -0,0 +1,99 @@
+/*
+ * tipc. TIPC utility frontend.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Richard Alpe <richard.alpe@ericsson.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <unistd.h>
+
+#include "bearer.h"
+#include "link.h"
+#include "nametable.h"
+#include "socket.h"
+#include "media.h"
+#include "node.h"
+#include "peer.h"
+#include "cmdl.h"
+
+int help_flag;
+
+static void about(struct cmdl *cmdl)
+{
+ fprintf(stderr,
+ "Transparent Inter-Process Communication Protocol\n"
+ "Usage: %s [OPTIONS] COMMAND [ARGS] ...\n"
+ "\n"
+ "Options:\n"
+ " -h, --help \t\tPrint help for last given command\n"
+ "\n"
+ "Commands:\n"
+ " bearer - Show or modify bearers\n"
+ " link - Show or modify links\n"
+ " media - Show or modify media\n"
+ " nametable - Show nametable\n"
+ " node - Show or modify node related parameters\n"
+ " peer - Peer related operations\n"
+ " socket - Show sockets\n",
+ cmdl->argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+ int i;
+ int res;
+ struct cmdl cmdl;
+ const struct cmd cmd = {"tipc", NULL, about};
+ struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+ };
+ const struct cmd cmds[] = {
+ { "bearer", cmd_bearer, cmd_bearer_help},
+ { "link", cmd_link, cmd_link_help},
+ { "media", cmd_media, cmd_media_help},
+ { "nametable", cmd_nametable, cmd_nametable_help},
+ { "node", cmd_node, cmd_node_help},
+ { "peer", cmd_peer, cmd_peer_help},
+ { "socket", cmd_socket, cmd_socket_help},
+ { NULL }
+ };
+
+ do {
+ int option_index = 0;
+
+ i = getopt_long(argc, argv, "h", long_options, &option_index);
+
+ switch (i) {
+ case 'h':
+ /*
+ * We want the help for the last command, so we flag
+ * here in order to print later.
+ */
+ help_flag = 1;
+ break;
+ case -1:
+ /* End of options */
+ break;
+ default:
+ /* Invalid option, error msg is printed by getopts */
+ return 1;
+ }
+ } while (i != -1);
+
+ cmdl.optind = optind;
+ cmdl.argc = argc;
+ cmdl.argv = argv;
+
+ if ((res = run_cmd(NULL, &cmd, cmds, &cmdl, NULL)) != 0)
+ return 1;
+
+ return 0;
+}