aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <sthemmin@microsoft.com>2017-10-18 17:11:50 -0700
committerStephen Hemminger <sthemmin@microsoft.com>2017-10-18 17:11:50 -0700
commit4b4dde0ae6bed45192a1056c57ef87dc2a31d2c1 (patch)
treedbf39684e8d0fb942dc7350b172568778f1ed097
parent70556c1632e6fdbc8489bf3c59e0588ece2e2f37 (diff)
parent4b73d52f8a81919f511cd47d39251f74f6a37c7d (diff)
downloadplatform_external_iproute2-4b4dde0ae6bed45192a1056c57ef87dc2a31d2c1.tar.gz
platform_external_iproute2-4b4dde0ae6bed45192a1056c57ef87dc2a31d2c1.tar.bz2
platform_external_iproute2-4b4dde0ae6bed45192a1056c57ef87dc2a31d2c1.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2
-rw-r--r--.gitignore2
-rw-r--r--Makefile6
-rwxr-xr-xconfigure24
-rw-r--r--doc/Makefile73
-rw-r--r--doc/Plan16
-rw-r--r--doc/SNAPSHOT.tex1
-rw-r--r--doc/api-ip6-flowlabels.tex429
-rw-r--r--doc/arpd.sgml130
-rw-r--r--doc/do-psnup16
-rw-r--r--doc/ip-cref.tex3453
-rw-r--r--doc/ip-tunnels.tex469
-rw-r--r--doc/nstat.sgml110
-rw-r--r--doc/preamble.tex26
-rw-r--r--doc/rtstat.sgml52
-rw-r--r--doc/ss.sgml525
-rw-r--r--doc/tc-filters.tex514
-rw-r--r--include/color.h3
-rw-r--r--include/json_print.h2
-rw-r--r--include/uapi/linux/atm.h (renamed from include/linux/atm.h)0
-rw-r--r--include/uapi/linux/atmapi.h (renamed from include/linux/atmapi.h)0
-rw-r--r--include/uapi/linux/atmarp.h (renamed from include/linux/atmarp.h)0
-rw-r--r--include/uapi/linux/atmdev.h (renamed from include/linux/atmdev.h)0
-rw-r--r--include/uapi/linux/atmioc.h (renamed from include/linux/atmioc.h)0
-rw-r--r--include/uapi/linux/atmsap.h (renamed from include/linux/atmsap.h)0
-rw-r--r--include/uapi/linux/bpf.h (renamed from include/linux/bpf.h)0
-rw-r--r--include/uapi/linux/bpf_common.h (renamed from include/linux/bpf_common.h)0
-rw-r--r--include/uapi/linux/can.h (renamed from include/linux/can.h)0
-rw-r--r--include/uapi/linux/can/netlink.h (renamed from include/linux/can/netlink.h)0
-rw-r--r--include/uapi/linux/can/vxcan.h (renamed from include/linux/can/vxcan.h)0
-rw-r--r--include/uapi/linux/devlink.h (renamed from include/linux/devlink.h)0
-rw-r--r--include/uapi/linux/elf-em.h (renamed from include/linux/elf-em.h)0
-rw-r--r--include/uapi/linux/fib_rules.h (renamed from include/linux/fib_rules.h)0
-rw-r--r--include/uapi/linux/filter.h (renamed from include/linux/filter.h)0
-rw-r--r--include/uapi/linux/fou.h (renamed from include/linux/fou.h)0
-rw-r--r--include/uapi/linux/gen_stats.h (renamed from include/linux/gen_stats.h)0
-rw-r--r--include/uapi/linux/genetlink.h (renamed from include/linux/genetlink.h)0
-rw-r--r--include/uapi/linux/hdlc/ioctl.h (renamed from include/linux/hdlc/ioctl.h)0
-rw-r--r--include/uapi/linux/icmpv6.h (renamed from include/linux/icmpv6.h)0
-rw-r--r--include/uapi/linux/if.h (renamed from include/linux/if.h)0
-rw-r--r--include/uapi/linux/if_addr.h (renamed from include/linux/if_addr.h)0
-rw-r--r--include/uapi/linux/if_addrlabel.h (renamed from include/linux/if_addrlabel.h)0
-rw-r--r--include/uapi/linux/if_alg.h (renamed from include/linux/if_alg.h)0
-rw-r--r--include/uapi/linux/if_arp.h (renamed from include/linux/if_arp.h)0
-rw-r--r--include/uapi/linux/if_bonding.h (renamed from include/linux/if_bonding.h)0
-rw-r--r--include/uapi/linux/if_bridge.h (renamed from include/linux/if_bridge.h)0
-rw-r--r--include/uapi/linux/if_ether.h (renamed from include/linux/if_ether.h)0
-rw-r--r--include/uapi/linux/if_link.h (renamed from include/linux/if_link.h)0
-rw-r--r--include/uapi/linux/if_macsec.h (renamed from include/linux/if_macsec.h)0
-rw-r--r--include/uapi/linux/if_packet.h (renamed from include/linux/if_packet.h)0
-rw-r--r--include/uapi/linux/if_tun.h (renamed from include/linux/if_tun.h)0
-rw-r--r--include/uapi/linux/if_tunnel.h (renamed from include/linux/if_tunnel.h)0
-rw-r--r--include/uapi/linux/if_vlan.h (renamed from include/linux/if_vlan.h)0
-rw-r--r--include/uapi/linux/ife.h (renamed from include/linux/ife.h)0
-rw-r--r--include/uapi/linux/ila.h (renamed from include/linux/ila.h)0
-rw-r--r--include/uapi/linux/in.h (renamed from include/linux/in.h)0
-rw-r--r--include/uapi/linux/in6.h (renamed from include/linux/in6.h)0
-rw-r--r--include/uapi/linux/in_route.h (renamed from include/linux/in_route.h)0
-rw-r--r--include/uapi/linux/inet_diag.h (renamed from include/linux/inet_diag.h)0
-rw-r--r--include/uapi/linux/ip.h (renamed from include/linux/ip.h)0
-rw-r--r--include/uapi/linux/ip6_tunnel.h (renamed from include/linux/ip6_tunnel.h)0
-rw-r--r--include/uapi/linux/ipsec.h (renamed from include/linux/ipsec.h)0
-rw-r--r--include/uapi/linux/kernel.h (renamed from include/linux/kernel.h)0
-rw-r--r--include/uapi/linux/l2tp.h (renamed from include/linux/l2tp.h)0
-rw-r--r--include/uapi/linux/libc-compat.h (renamed from include/linux/libc-compat.h)0
-rw-r--r--include/uapi/linux/limits.h (renamed from include/linux/limits.h)0
-rw-r--r--include/uapi/linux/lwtunnel.h (renamed from include/linux/lwtunnel.h)0
-rw-r--r--include/uapi/linux/magic.h (renamed from include/linux/magic.h)0
-rw-r--r--include/uapi/linux/mpls.h (renamed from include/linux/mpls.h)0
-rw-r--r--include/uapi/linux/mpls_iptunnel.h (renamed from include/linux/mpls_iptunnel.h)0
-rw-r--r--include/uapi/linux/neighbour.h (renamed from include/linux/neighbour.h)0
-rw-r--r--include/uapi/linux/net_namespace.h (renamed from include/linux/net_namespace.h)0
-rw-r--r--include/uapi/linux/netconf.h (renamed from include/linux/netconf.h)0
-rw-r--r--include/uapi/linux/netdevice.h (renamed from include/linux/netdevice.h)0
-rw-r--r--include/uapi/linux/netfilter.h (renamed from include/linux/netfilter.h)0
-rw-r--r--include/uapi/linux/netfilter/ipset/ip_set.h (renamed from include/linux/netfilter/ipset/ip_set.h)0
-rw-r--r--include/uapi/linux/netfilter/x_tables.h (renamed from include/linux/netfilter/x_tables.h)0
-rw-r--r--include/uapi/linux/netfilter/xt_set.h (renamed from include/linux/netfilter/xt_set.h)0
-rw-r--r--include/uapi/linux/netfilter/xt_tcpudp.h (renamed from include/linux/netfilter/xt_tcpudp.h)0
-rw-r--r--include/uapi/linux/netfilter_ipv4.h (renamed from include/linux/netfilter_ipv4.h)0
-rw-r--r--include/uapi/linux/netfilter_ipv4/ip_tables.h (renamed from include/linux/netfilter_ipv4/ip_tables.h)0
-rw-r--r--include/uapi/linux/netfilter_ipv6.h (renamed from include/linux/netfilter_ipv6.h)0
-rw-r--r--include/uapi/linux/netfilter_ipv6/ip6_tables.h (renamed from include/linux/netfilter_ipv6/ip6_tables.h)0
-rw-r--r--include/uapi/linux/netlink.h (renamed from include/linux/netlink.h)0
-rw-r--r--include/uapi/linux/netlink_diag.h (renamed from include/linux/netlink_diag.h)0
-rw-r--r--include/uapi/linux/packet_diag.h (renamed from include/linux/packet_diag.h)0
-rw-r--r--include/uapi/linux/param.h (renamed from include/linux/param.h)0
-rw-r--r--include/uapi/linux/pfkeyv2.h (renamed from include/linux/pfkeyv2.h)0
-rw-r--r--include/uapi/linux/pkt_cls.h (renamed from include/linux/pkt_cls.h)0
-rw-r--r--include/uapi/linux/pkt_sched.h (renamed from include/linux/pkt_sched.h)0
-rw-r--r--include/uapi/linux/posix_types.h (renamed from include/linux/posix_types.h)0
-rw-r--r--include/uapi/linux/rtnetlink.h (renamed from include/linux/rtnetlink.h)0
-rw-r--r--include/uapi/linux/sctp.h (renamed from include/linux/sctp.h)0
-rw-r--r--include/uapi/linux/seg6.h (renamed from include/linux/seg6.h)0
-rw-r--r--include/uapi/linux/seg6_genl.h (renamed from include/linux/seg6_genl.h)0
-rw-r--r--include/uapi/linux/seg6_hmac.h (renamed from include/linux/seg6_hmac.h)0
-rw-r--r--include/uapi/linux/seg6_iptunnel.h (renamed from include/linux/seg6_iptunnel.h)0
-rw-r--r--include/uapi/linux/seg6_local.h (renamed from include/linux/seg6_local.h)0
-rw-r--r--include/uapi/linux/sock_diag.h (renamed from include/linux/sock_diag.h)0
-rw-r--r--include/uapi/linux/socket.h (renamed from include/linux/socket.h)0
-rw-r--r--include/uapi/linux/sockios.h (renamed from include/linux/sockios.h)0
-rw-r--r--include/uapi/linux/stddef.h (renamed from include/linux/stddef.h)0
-rw-r--r--include/uapi/linux/sysinfo.h (renamed from include/linux/sysinfo.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_bpf.h (renamed from include/linux/tc_act/tc_bpf.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_connmark.h (renamed from include/linux/tc_act/tc_connmark.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_csum.h (renamed from include/linux/tc_act/tc_csum.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_defact.h (renamed from include/linux/tc_act/tc_defact.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_gact.h (renamed from include/linux/tc_act/tc_gact.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_ife.h (renamed from include/linux/tc_act/tc_ife.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_ipt.h (renamed from include/linux/tc_act/tc_ipt.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_mirred.h (renamed from include/linux/tc_act/tc_mirred.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_nat.h (renamed from include/linux/tc_act/tc_nat.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_pedit.h (renamed from include/linux/tc_act/tc_pedit.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_sample.h (renamed from include/linux/tc_act/tc_sample.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_skbedit.h (renamed from include/linux/tc_act/tc_skbedit.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_skbmod.h (renamed from include/linux/tc_act/tc_skbmod.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_tunnel_key.h (renamed from include/linux/tc_act/tc_tunnel_key.h)0
-rw-r--r--include/uapi/linux/tc_act/tc_vlan.h (renamed from include/linux/tc_act/tc_vlan.h)0
-rw-r--r--include/uapi/linux/tc_ematch/tc_em_cmp.h (renamed from include/linux/tc_ematch/tc_em_cmp.h)0
-rw-r--r--include/uapi/linux/tc_ematch/tc_em_meta.h (renamed from include/linux/tc_ematch/tc_em_meta.h)0
-rw-r--r--include/uapi/linux/tc_ematch/tc_em_nbyte.h (renamed from include/linux/tc_ematch/tc_em_nbyte.h)0
-rw-r--r--include/uapi/linux/tcp.h (renamed from include/linux/tcp.h)0
-rw-r--r--include/uapi/linux/tcp_metrics.h (renamed from include/linux/tcp_metrics.h)0
-rw-r--r--include/uapi/linux/tipc.h (renamed from include/linux/tipc.h)0
-rw-r--r--include/uapi/linux/tipc_netlink.h (renamed from include/linux/tipc_netlink.h)0
-rw-r--r--include/uapi/linux/types.h (renamed from include/linux/types.h)0
-rw-r--r--include/uapi/linux/unix_diag.h (renamed from include/linux/unix_diag.h)0
-rw-r--r--include/uapi/linux/veth.h (renamed from include/linux/veth.h)0
-rw-r--r--include/uapi/linux/xfrm.h (renamed from include/linux/xfrm.h)0
-rw-r--r--include/uapi/rdma/rdma_netlink.h (renamed from include/rdma/rdma_netlink.h)6
-rw-r--r--include/utils.h4
-rw-r--r--ip/ip6tunnel.c9
-rw-r--r--ip/ipl2tp.c4
-rw-r--r--ip/iplink.c31
-rw-r--r--ip/ipmaddr.c3
-rw-r--r--ip/iproute.c4
-rw-r--r--ip/iprule.c10
-rw-r--r--ip/iptunnel.c29
-rw-r--r--ip/iptuntap.c6
-rw-r--r--ip/xfrm_state.c2
-rw-r--r--lib/color.c17
-rw-r--r--lib/utils.c46
-rw-r--r--misc/arpd.c3
-rw-r--r--misc/ss.c21
-rw-r--r--tc/f_flower.c7
-rw-r--r--tc/f_u32.c3
-rw-r--r--tc/q_netem.c12
-rwxr-xr-xtestsuite/tests/ip/link/new_link.t2
-rwxr-xr-xtestsuite/tests/ip/link/show_dev_wo_vf_rate.t2
-rwxr-xr-xtestsuite/tests/ip/netns/set_nsid.t2
-rwxr-xr-xtestsuite/tests/ip/netns/set_nsid_batch.t2
-rwxr-xr-xtestsuite/tests/ip/route/add_default_route.t2
-rwxr-xr-xtestsuite/tests/ip/tunnel/add_tunnel.t2
-rwxr-xr-xtestsuite/tests/tc/cls-testbed.t2
-rwxr-xr-xtestsuite/tests/tc/dsmark.t2
-rwxr-xr-xtestsuite/tests/tc/pedit.t2
-rw-r--r--tipc/Makefile2
156 files changed, 188 insertions, 5900 deletions
diff --git a/.gitignore b/.gitignore
index 308aec6b..f8c3dfca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+# locally generated
+Config
static-syms.h
config.*
*.o
diff --git a/Makefile b/Makefile
index 7a691dea..6ad96104 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,7 @@ CCOPTS = -O2
WFLAGS := -Wall -Wstrict-prototypes -Wmissing-prototypes
WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2
-CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS)
+CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS)
YACCFLAGS = -d -t -v
SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man
@@ -73,7 +73,7 @@ install: all
$(DESTDIR)$(DOCDIR)/examples
install -m 0644 $(shell find examples/diffserv -maxdepth 1 -type f) \
$(DESTDIR)$(DOCDIR)/examples/diffserv
- @for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done
+ @for i in $(SUBDIRS); do $(MAKE) -C $$i install; done
install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
@@ -84,7 +84,7 @@ snapshot:
> include/SNAPSHOT.h
clean:
- @for i in $(SUBDIRS) doc; \
+ @for i in $(SUBDIRS); \
do $(MAKE) $(MFLAGS) -C $$i clean; done
clobber:
diff --git a/configure b/configure
index 7be8fb11..f0668ab3 100755
--- a/configure
+++ b/configure
@@ -326,6 +326,27 @@ EOF
rm -f $TMPDIR/dbtest.c $TMPDIR/dbtest
}
+check_strlcpy()
+{
+ cat >$TMPDIR/strtest.c <<EOF
+#include <string.h>
+int main(int argc, char **argv) {
+ char dst[10];
+ strlcpy(dst, "test", sizeof(dst));
+ return 0;
+}
+EOF
+ $CC -I$INCLUDE -o $TMPDIR/strtest $TMPDIR/strtest.c >/dev/null 2>&1
+ if [ $? -eq 0 ]
+ then
+ echo "no"
+ else
+ echo 'CFLAGS += -DNEED_STRLCPY' >>$CONFIG
+ echo "yes"
+ fi
+ rm -f $TMPDIR/strtest.c $TMPDIR/strtest
+}
+
quiet_config()
{
cat <<EOF
@@ -397,6 +418,9 @@ check_mnl
echo -n "Berkeley DB: "
check_berkeley_db
+echo -n "need for strlcpy: "
+check_strlcpy
+
echo
echo -n "docs:"
check_docs
diff --git a/doc/Makefile b/doc/Makefile
deleted file mode 100644
index 0c51872a..00000000
--- a/doc/Makefile
+++ /dev/null
@@ -1,73 +0,0 @@
-PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps tc-filters.ps
-# tc-cref.ps
-# api-rtnl.tex api-pmtudisc.tex api-news.tex
-# iki-netdev.ps iki-neighdst.ps
-
-
-LATEX=latex
-DVIPS=dvips
-SGML2DVI=sgml2latex
-SGML2HTML=sgml2html -s 0
-LPR=lpr -Zsduplex
-SHELL=bash
-PAGESIZE=a4
-PAGESPERPAGE=2
-
-HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml))
-DVIFILES=$(subst .ps,.dvi,$(PSFILES))
-PDFFILES=$(subst .ps,.pdf,$(PSFILES))
-
-
-all: pstwocol
-
-pstwocol: $(PSFILES)
-
-html: $(HTMLFILES)
-
-dvi: $(DVIFILES)
-
-pdf: $(PDFFILES)
-
-print: $(PSFILES)
- $(LPR) $(PSFILES)
-
-%.tex: %.sgml
- $(SGML2DVI) --output=tex $<
-
-%.dvi: %.sgml
- $(SGML2DVI) --output=dvi $<
-
-%.dvi: %.tex
- @set -e; pass=2; echo "Running LaTeX $<"; \
- while [ `$(LATEX) $< </dev/null 2>&1 | \
- grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
- if [ $$pass -gt 3 ]; then \
- echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
- fi; \
- echo "Re-running LaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
- done
-
-%.pdf: %.tex
- @set -e; pass=2; echo "Running pdfLaTeX $<"; \
- while [ `pdflatex $< </dev/null 2>&1 | \
- grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
- if [ $$pass -gt 3 ]; then \
- echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
- fi; \
- echo "Re-running pdfLaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
- done
-#%.pdf: %.ps
-# ps2pdf $<
-
-%.ps: %.dvi
- $(DVIPS) $< -o $@
-
-%.html: %.sgml
- $(SGML2HTML) $<
-
-install:
- install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR)
- install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR)
-
-clean:
- rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html *.pdf
diff --git a/doc/Plan b/doc/Plan
deleted file mode 100644
index 55f478ea..00000000
--- a/doc/Plan
+++ /dev/null
@@ -1,16 +0,0 @@
-Partially finished work.
-
-1. User Reference manuals.
-1.1 IP Command reference (ip-cref.tex, published)
-1.2 TC Command reference (tc-cref.tex)
-1.3 IP tunnels (ip-tunnels.tex, published)
-
-2. Linux-2.2 Networking API
-2.1 RTNETLINK (api-rtnl.tex)
-2.2 Path MTU Discovery (api-pmtudisc.tex)
-2.3 IPv6 Flow Labels (api-ip6-flowlabels.tex, published)
-2.4 Miscellaneous extensions (api-misc.tex)
-
-3. Linux-2.2 Networking Intra-Kernel Interfaces
-3.1 NetDev --- Networking Devices and netdev... (iki-netdev.tex)
-3.2 Neighbour cache and destination cache. (iki-neighdst.tex)
diff --git a/doc/SNAPSHOT.tex b/doc/SNAPSHOT.tex
deleted file mode 100644
index 7ed02984..00000000
--- a/doc/SNAPSHOT.tex
+++ /dev/null
@@ -1 +0,0 @@
-\def\Draft{020116}
diff --git a/doc/api-ip6-flowlabels.tex b/doc/api-ip6-flowlabels.tex
deleted file mode 100644
index aa34e947..00000000
--- a/doc/api-ip6-flowlabels.tex
+++ /dev/null
@@ -1,429 +0,0 @@
-\documentstyle[12pt,twoside]{article}
-\def\TITLE{IPv6 Flow Labels}
-\input preamble
-\begin{center}
-\Large\bf IPv6 Flow Labels in Linux-2.2.
-\end{center}
-
-
-\begin{center}
-{ \large Alexey~N.~Kuznetsov } \\
-\em Institute for Nuclear Research, Moscow \\
-\verb|kuznet@ms2.inr.ac.ru| \\
-\rm April 11, 1999
-\end{center}
-
-\vspace{5mm}
-
-\tableofcontents
-
-\section{Introduction.}
-
-Every IPv6 packet carries 28 bits of flow information. RFC2460 splits
-these bits to two fields: 8 bits of traffic class (or DS field, if you
-prefer this term) and 20 bits of flow label. Currently there exist
-no well-defined API to manage IPv6 flow information. In this document
-I describe an attempt to design the API for Linux-2.2 IPv6 stack.
-
-\vskip 1mm
-
-The API must solve the following tasks:
-
-\begin{enumerate}
-
-\item To allow user to set traffic class bits.
-
-\item To allow user to read traffic class bits of received packets.
-This feature is not so useful as the first one, however it will be
-necessary f.e.\ to implement ECN [RFC2481] for datagram oriented services
-or to implement receiver side of SRP or another end-to-end protocol
-using traffic class bits.
-
-\item To assign flow labels to packets sent by user.
-
-\item To get flow labels of received packets. I do not know
-any applications of this feature, but it is possible that receiver will
-want to use flow labels to distinguish sub-flows.
-
-\item To allocate flow labels in the way, compliant to RFC2460. Namely:
-
-\begin{itemize}
-\item
-Flow labels must be uniformly distributed (pseudo-)random numbers,
-so that any subset of 20 bits can be used as hash key.
-
-\item
-Flows with coinciding source address and flow label must have identical
-destination address and not-fragmentable extensions headers (i.e.\
-hop by hop options and all the headers up to and including routing header,
-if it is present.)
-
-\begin{NB}
-There is a hole in specs: some hop-by-hop options can be
-defined only on per-packet base (f.e.\ jumbo payload option).
-Essentially, it means that such options cannot present in packets
-with flow labels.
-\end{NB}
-\begin{NB}
-NB notes here and below reflect only my personal opinion,
-they should be read with smile or should not be read at all :-).
-\end{NB}
-
-
-\item
-Flow labels have finite lifetime and source is not allowed to reuse
-flow label for another flow within the maximal lifetime has expired,
-so that intermediate nodes will be able to invalidate flow state before
-the label is taken over by another flow.
-Flow state, including lifetime, is propagated along datagram path
-by some application specific methods
-(f.e.\ in RSVP PATH messages or in some hop-by-hop option).
-
-
-\end{itemize}
-
-\end{enumerate}
-
-\section{Sending/receiving flow information.}
-
-\paragraph{Discussion.}
-\addcontentsline{toc}{subsection}{Discussion}
-It was proposed (Where? I do not remember any explicit statement)
-to solve the first four tasks using
-\verb|sin6_flowinfo| field added to \verb|struct| \verb|sockaddr_in6|
-(see RFC2553).
-
-\begin{NB}
- This method is difficult to consider as reasonable, because it
- puts additional overhead to all the services, despite of only
- very small subset of them (none, to be more exact) really use it.
- It contradicts both to IETF spirit and the letter. Before RFC2553
- one justification existed, IPv6 address alignment left 4 byte
- hole in \verb|sockaddr_in6| in any case. Now it has no justification.
-\end{NB}
-
-We have two problems with this method. The first one is common for all OSes:
-if \verb|recvmsg()| initializes \verb|sin6_flowinfo| to flow info
-of received packet, we loose one very important property of BSD socket API,
-namely, we are not allowed to use received address for reply directly
-and have to mangle it, even if we are not interested in flowinfo subtleties.
-
-\begin{NB}
- RFC2553 adds new requirement: to clear \verb|sin6_flowinfo|.
- Certainly, it is not solution but rather attempt to force applications
- to make unnecessary work. Well, as usually, one mistake in design
- is followed by attempts to patch the hole and more mistakes...
-\end{NB}
-
-Another problem is Linux specific. Historically Linux IPv6 did not
-initialize \verb|sin6_flowinfo| at all, so that, if kernel does not
-support flow labels, this field is not zero, but a random number.
-Some applications also did not take care about it.
-
-\begin{NB}
-Following RFC2553 such applications can be considered as broken,
-but I still think that they are right: clearing all the address
-before filling known fields is robust but stupid solution.
-Useless wasting CPU cycles and
-memory bandwidth is not a good idea. Such patches are acceptable
-as temporary hacks, but not as standard of the future.
-\end{NB}
-
-
-\paragraph{Implementation.}
-\addcontentsline{toc}{subsection}{Implementation}
-By default Linux IPv6 does not read \verb|sin6_flowinfo| field
-assuming that common applications are not obliged to initialize it
-and are permitted to consider it as pure alignment padding.
-In order to tell kernel that application
-is aware of this field, it is necessary to set socket option
-\verb|IPV6_FLOWINFO_SEND|.
-
-\begin{verbatim}
- int on = 1;
- setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO_SEND,
- (void*)&on, sizeof(on));
-\end{verbatim}
-
-Linux kernel never fills \verb|sin6_flowinfo| field, when passing
-message to user space, though the kernels which support flow labels
-initialize it to zero. If user wants to get received flowinfo, he
-will set option \verb|IPV6_FLOWINFO| and after this he will receive
-flowinfo as ancillary data object of type \verb|IPV6_FLOWINFO|
-(cf.\ RFC2292).
-
-\begin{verbatim}
- int on = 1;
- setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO, (void*)&on, sizeof(on));
-\end{verbatim}
-
-Flowinfo received and latched by a connected TCP socket also may be fetched
-with \verb|getsockopt()| \verb|IPV6_PKTOPTIONS| together with
-another optional information.
-
-Besides that, in the spirit of RFC2292 the option \verb|IPV6_FLOWINFO|
-may be used as alternative way to send flowinfo with \verb|sendmsg()| or
-to latch it with \verb|IPV6_PKTOPTIONS|.
-
-\paragraph{Note about IPv6 options and destination address.}
-\addcontentsline{toc}{subsection}{IPv6 options and destination address}
-If \verb|sin6_flowinfo| does contain not zero flow label,
-destination address in \verb|sin6_addr| and non-fragmentable
-extension headers are ignored. Instead, kernel uses the values
-cached at flow setup (see below). However, for connected sockets
-kernel prefers the values set at connection time.
-
-\paragraph{Example.}
-\addcontentsline{toc}{subsection}{Example}
-After setting socket option \verb|IPV6_FLOWINFO|
-flowlabel and DS field are received as ancillary data object
-of type \verb|IPV6_FLOWINFO| and level \verb|SOL_IPV6|.
-In the cases when it is convenient to use \verb|recvfrom(2)|,
-it is possible to replace library variant with your own one,
-sort of:
-
-\begin{verbatim}
-#include <sys/socket.h>
-#include <netinet/in6.h>
-
-size_t recvfrom(int fd, char *buf, size_t len, int flags,
- struct sockaddr *addr, int *addrlen)
-{
- size_t cc;
- char cbuf[128];
- struct cmsghdr *c;
- struct iovec iov = { buf, len };
- struct msghdr msg = { addr, *addrlen,
- &iov, 1,
- cbuf, sizeof(cbuf),
- 0 };
-
- cc = recvmsg(fd, &msg, flags);
- if (cc < 0)
- return cc;
- ((struct sockaddr_in6*)addr)->sin6_flowinfo = 0;
- *addrlen = msg.msg_namelen;
- for (c=CMSG_FIRSTHDR(&msg); c; c = CMSG_NEXTHDR(&msg, c)) {
- if (c->cmsg_level != SOL_IPV6 ||
- c->cmsg_type != IPV6_FLOWINFO)
- continue;
- ((struct sockaddr_in6*)addr)->sin6_flowinfo = *(__u32*)CMSG_DATA(c);
- }
- return cc;
-}
-\end{verbatim}
-
-
-
-\section{Flow label management.}
-
-\paragraph{Discussion.}
-\addcontentsline{toc}{subsection}{Discussion}
-Requirements of RFC2460 are pretty tough. Particularly, lifetimes
-longer than boot time require to store allocated labels at stable
-storage, so that the full implementation necessarily includes user space flow
-label manager. There are at least three different approaches:
-
-\begin{enumerate}
-\item {\bf ``Cooperative''. } We could leave flow label allocation wholly
-to user space. When user needs label he requests manager directly. The approach
-is valid, but as any ``cooperative'' approach it suffers of security problems.
-
-\begin{NB}
-One idea is to disallow not privileged user to allocate flow
-labels, but instead to pass the socket to manager via \verb|SCM_RIGHTS|
-control message, so that it will allocate label and assign it to socket
-itself. Hmm... the idea is interesting.
-\end{NB}
-
-\item {\bf ``Indirect''.} Kernel redirects requests to user level daemon
-and does not install label until the daemon acknowledged the request.
-The approach is the most promising, it is especially pleasant to recognize
-parallel with IPsec API [RFC2367,Craig]. Actually, it may share API with
-IPsec.
-
-\item {\bf ``Stupid''.} To allocate labels in kernel space. It is the simplest
-method, but it suffers of two serious flaws: the first,
-we cannot lease labels with lifetimes longer than boot time, the second,
-it is sensitive to DoS attacks. Kernel have to remember all the obsolete
-labels until their expiration and malicious user may fastly eat all the
-flow label space.
-
-\end{enumerate}
-
-Certainly, I choose the most ``stupid'' method. It is the cheapest one
-for implementor (i.e.\ me), and taking into account that flow labels
-still have no serious applications it is not useful to work on more
-advanced API, especially, taking into account that eventually we
-will get it for no fee together with IPsec.
-
-
-\paragraph{Implementation.}
-\addcontentsline{toc}{subsection}{Implementation}
-Socket option \verb|IPV6_FLOWLABEL_MGR| allows to
-request flow label manager to allocate new flow label, to reuse
-already allocated one or to delete old flow label.
-Its argument is \verb|struct| \verb|in6_flowlabel_req|:
-
-\begin{verbatim}
-struct in6_flowlabel_req
-{
- struct in6_addr flr_dst;
- __u32 flr_label;
- __u8 flr_action;
- __u8 flr_share;
- __u16 flr_flags;
- __u16 flr_expires;
- __u16 flr_linger;
- __u32 __flr_reserved;
- /* Options in format of IPV6_PKTOPTIONS */
-};
-\end{verbatim}
-
-\begin{itemize}
-
-\item \verb|dst| is IPv6 destination address associated with the label.
-
-\item \verb|label| is flow label value in network byte order. If it is zero,
-kernel will allocate new pseudo-random number. Otherwise, kernel will try
-to lease flow label ordered by user. In this case, it is user task to provide
-necessary flow label randomness.
-
-\item \verb|action| is requested operation. Currently, only three operations
-are defined:
-
-\begin{verbatim}
-#define IPV6_FL_A_GET 0 /* Get flow label */
-#define IPV6_FL_A_PUT 1 /* Release flow label */
-#define IPV6_FL_A_RENEW 2 /* Update expire time */
-\end{verbatim}
-
-\item \verb|flags| are optional modifiers. Currently
-only \verb|IPV6_FL_A_GET| has modifiers:
-
-\begin{verbatim}
-#define IPV6_FL_F_CREATE 1 /* Allowed to create new label */
-#define IPV6_FL_F_EXCL 2 /* Do not create new label */
-\end{verbatim}
-
-
-\item \verb|share| defines who is allowed to reuse the same flow label.
-
-\begin{verbatim}
-#define IPV6_FL_S_NONE 0 /* Not defined */
-#define IPV6_FL_S_EXCL 1 /* Label is private */
-#define IPV6_FL_S_PROCESS 2 /* May be reused by this process */
-#define IPV6_FL_S_USER 3 /* May be reused by this user */
-#define IPV6_FL_S_ANY 255 /* Anyone may reuse it */
-\end{verbatim}
-
-\item \verb|linger| is time in seconds. After the last user releases flow
-label, it will not be reused with different destination and options at least
-during this time. If \verb|share| is not \verb|IPV6_FL_S_EXCL| the label
-still can be shared by another sockets. Current implementation does not allow
-unprivileged user to set linger longer than 60 sec.
-
-\item \verb|expires| is time in seconds. Flow label will be kept at least
-for this time, but it will not be destroyed before user released it explicitly
-or closed all the sockets using it. Current implementation does not allow
-unprivileged user to set timeout longer than 60 sec. Proviledged applications
-MAY set longer lifetimes, but in this case they MUST save allocated
-labels at stable storage and restore them back after reboot before the first
-application allocates new flow.
-
-\end{itemize}
-
-This structure is followed by optional extension headers associated
-with this flow label in format of \verb|IPV6_PKTOPTIONS|. Only
-\verb|IPV6_HOPOPTS|, \verb|IPV6_RTHDR| and, if \verb|IPV6_RTHDR| presents,
-\verb|IPV6_DSTOPTS| are allowed.
-
-\paragraph{Example.}
-\addcontentsline{toc}{subsection}{Example}
- The function \verb|get_flow_label| allocates
-private flow label.
-
-\begin{verbatim}
-int get_flow_label(int fd, struct sockaddr_in6 *dst, __u32 fl)
-{
- int on = 1;
- struct in6_flowlabel_req freq;
-
- memset(&freq, 0, sizeof(freq));
- freq.flr_label = htonl(fl);
- freq.flr_action = IPV6_FL_A_GET;
- freq.flr_flags = IPV6_FL_F_CREATE | IPV6_FL_F_EXCL;
- freq.flr_share = IPV6_FL_S_EXCL;
- memcpy(&freq.flr_dst, &dst->sin6_addr, 16);
- if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
- &freq, sizeof(freq)) == -1) {
- perror ("can't lease flowlabel");
- return -1;
- }
- dst->sin6_flowinfo |= freq.flr_label;
-
- if (setsockopt(fd, SOL_IPV6, IPV6_FLOWINFO_SEND,
- &on, sizeof(on)) == -1) {
- perror ("can't send flowinfo");
-
- freq.flr_action = IPV6_FL_A_PUT;
- setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
- &freq, sizeof(freq));
- return -1;
- }
- return 0;
-}
-\end{verbatim}
-
-A bit more complicated example using routing header can be found
-in \verb|ping6| utility (\verb|iputils| package). Linux rsvpd backend
-contains an example of using operation \verb|IPV6_FL_A_RENEW|.
-
-\paragraph{Listing flow labels.}
-\addcontentsline{toc}{subsection}{Listing flow labels}
-List of currently allocated
-flow labels may be read from \verb|/proc/net/ip6_flowlabel|.
-
-\begin{verbatim}
-Label S Owner Users Linger Expires Dst Opt
-A1BE5 1 0 0 6 3 3ffe2400000000010a0020fffe71fb30 0
-\end{verbatim}
-
-\begin{itemize}
-\item \verb|Label| is hexadecimal flow label value.
-\item \verb|S| is sharing style.
-\item \verb|Owner| is ID of creator, it is zero, pid or uid, depending on
- sharing style.
-\item \verb|Users| is number of applications using the label now.
-\item \verb|Linger| is \verb|linger| of this label in seconds.
-\item \verb|Expires| is time until expiration of the label in seconds. It may
- be negative, if the label is in use.
-\item \verb|Dst| is IPv6 destination address.
-\item \verb|Opt| is length of options, associated with the label. Option
- data are not accessible.
-\end{itemize}
-
-
-\paragraph{Flow labels and RSVP.}
-\addcontentsline{toc}{subsection}{Flow labels and RSVP}
-RSVP daemon supports IPv6 flow labels
-without any modifications to standard ISI RAPI. Sender must allocate
-flow label, fill corresponding sender template and submit it to local rsvp
-daemon. rsvpd will check the label and start to announce it in PATH
-messages. Rsvpd on sender node will renew the flow label, so that it will not
-be reused before path state expires and all the intermediate
-routers and receiver purge flow state.
-
-\verb|rtap| utility is modified to parse flow labels. F.e.\ if user allocated
-flow label \verb|0xA1234|, he may write:
-
-\begin{verbatim}
-RTAP> sender 3ffe:2400::1/FL0xA1234 <Tspec>
-\end{verbatim}
-
-Receiver makes reservation with command:
-\begin{verbatim}
-RTAP> reserve ff 3ffe:2400::1/FL0xA1234 <Flowspec>
-\end{verbatim}
-
-\end{document}
diff --git a/doc/arpd.sgml b/doc/arpd.sgml
deleted file mode 100644
index 0ab79c60..00000000
--- a/doc/arpd.sgml
+++ /dev/null
@@ -1,130 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>ARPD Daemon
-<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
-<date>some_negative_number, 20 Sep 2001
-<abstract>
-<tt/arpd/ is daemon collecting gratuitous ARP information, saving
-it on local disk and feeding it to kernel on demand to avoid
-redundant broadcasting due to limited size of kernel ARP cache.
-</abstract>
-
-
-<p><bf/Description/
-
-<p>The format of the command is:
-
-<tscreen><verb>
- arpd OPTIONS [ INTERFACE [ INTERFACE ... ] ]
-</verb></tscreen>
-
-<p> <tt/OPTIONS/ are:
-
-<itemize>
-
-<item><tt/-l/ - dump <tt/arpd/ database to stdout and exit. Output consists
-of three columns: interface index, IP address and MAC address.
-Negative entries for dead hosts are also shown, in this case MAC address
-is replaced by word <tt/FAILED/ followed by colon and time when the fact
-that host is dead was proven the last time.
-
-<item><tt/-f FILE/ - read and load <tt/arpd/ database from <tt/FILE/
-in text format similar dumped by option <tt/-l/. Exit after load,
-probably listing resulting database, if option <tt/-l/ is also given.
-If <tt/FILE/ is <tt/-/, <tt/stdin/ is read to get ARP table.
-
-<item><tt/-b DATABASE/ - location of database file. Default location is
-<tt>/var/lib/arpd/arpd.db</tt>.
-
-<item><tt/-a NUMBER/ - <tt/arpd/ not only passively listens ARP on wire, but
-also send brodcast queries itself. <tt/NUMBER/ is number of such queries
-to make before destination is considered as dead. When <tt/arpd/ is started
-as kernel helper (i.e. with <tt/app_solicit/ enabled in <tt/sysctl/
-or even with option <tt/-k/) without this option and still did not learn enough
-information, you can observe 1 second gaps in service. Not fatal, but
-not good.
-
-<item><tt/-k/ - suppress sending broadcast queries by kernel. It takes
-sense together with option <tt/-a/.
-
-<item><tt/-n TIME/ - timeout of negative cache. When resolution fails <tt/arpd/
-suppresses further attempts to resolve for this period. It makes sense
-only together with option <tt/-k/. This timeout should not be too much
-longer than boot time of a typical host not supporting gratuitous ARP.
-Default value is 60 seconds.
-
-<item><tt/-R RATE/ - maximal steady rate of broadcasts sent by <tt/arpd/
-in packets per second. Default value is 1.
-
-<item><tt/-B NUMBER/ - number of broadcasts sent by <tt/arpd/ back to back.
-Default value is 3. Together with option <tt/-R/ this option allows
-to police broadcasting not to exceed <tt/B+R*T/ over any interval
-of time <tt/T/.
-
-</itemize>
-
-<p><tt/INTERFACE/ is name of networking inteface to watch.
-If no interfaces given, <tt/arpd/ monitors all the interfaces.
-In this case <tt/arpd/ does not adjust <tt/sysctl/ parameters,
-it is supposed user does this himself after <tt/arpd/ is started.
-
-
-<p> Signals
-
-<p> <tt/arpd/ exits gracefully syncing database and restoring adjusted
-<tt/sysctl/ parameters, when receives <tt/SIGINT/ or <tt/SIGTERM/.
-<tt/SIGHUP/ syncs database to disk. <tt/SIGUSR1/ sends some statistics
-to <tt/syslog/. Effect of another signals is undefined, they may corrupt
-database and leave <tt/sysctl/ parameters in an unpredictable state.
-
-<p> Note
-
-<p> In order to <tt/arpd/ be able to serve as ARP resolver, kernel must be
-compiled with the option <tt/CONFIG_ARPD/ and, in the case when interface list
-is not given on command line, variable <tt/app_solicit/
-on interfaces of interest should be set in <tt>/proc/sys/net/ipv4/neigh/*</tt>.
-If this is not made <tt/arpd/ still collects gratuitous ARP information
-in its database.
-
-<p> Examples
-
-<enum>
-<item> Start <tt/arpd/ to collect gratuitous ARP, but not messing
-with kernel functionality:
-
-<tscreen><verb>
- arpd -b /var/tmp/arpd.db
-</verb></tscreen>
-
-<item> Look at result after some time:
-
-<tscreen><verb>
- killall arpd
- arpd -l -b /var/tmp/arpd.db
-</verb></tscreen>
-
-<item> To enable kernel helper, leaving leading role to kernel:
-
-<tscreen><verb>
- arpd -b /var/tmp/arpd.db -a 1 eth0 eth1
-</verb></tscreen>
-
-<item> Completely replace kernel resolution on interfaces <tt/eth0/
-and <tt/eth1/. In this case kernel still does unicast probing to
-validate entries, but all the broadcast activity is suppressed
-and made under authority of <tt/arpd/:
-
-<tscreen><verb>
- arpd -b /var/tmp/arpd.db -a 3 -k eth0 eth1
-</verb></tscreen>
-
-This is mode which <tt/arpd/ is supposed to work normally.
-It is not default just to prevent occasional enabling of too aggressive
-mode occasionally.
-
-</enum>
-
-</article>
-
diff --git a/doc/do-psnup b/doc/do-psnup
deleted file mode 100644
index 2dce848e..00000000
--- a/doc/do-psnup
+++ /dev/null
@@ -1,16 +0,0 @@
-#! /bin/bash
-# $1 = Temporary file . "string"
-# $2 = File to process . "string"
-# $3 = Page size . ie: a4 , letter ... "string"
-# $4 = Number of pages to fit on a single sheet . "numeric"
-
-if type psnup >&/dev/null; then
- echo "psnup -$4 -p$3 $1 $2"
- psnup -$4 -p$3 $1 $2
-elif type psmulti >&/dev/null; then
- echo "psmulti $1 > $2"
- psmulti $1 > $2
-else
- echo "cp $1 $2"
- cp $1 $2
-fi
diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex
deleted file mode 100644
index 179baa2f..00000000
--- a/doc/ip-cref.tex
+++ /dev/null
@@ -1,3453 +0,0 @@
-\documentstyle[12pt,twoside]{article}
-\def\TITLE{IP Command Reference}
-\input preamble
-\begin{center}
-\Large\bf IP Command Reference.
-\end{center}
-
-
-\begin{center}
-{ \large Alexey~N.~Kuznetsov } \\
-\em Institute for Nuclear Research, Moscow \\
-\verb|kuznet@ms2.inr.ac.ru| \\
-\rm April 14, 1999
-\end{center}
-
-\vspace{5mm}
-
-\tableofcontents
-
-\newpage
-
-\section{About this document}
-
-This document presents a comprehensive description of the \verb|ip| utility
-from the \verb|iproute2| package. It is not a tutorial or user's guide.
-It is a {\em dictionary\/}, not explaining terms,
-but translating them into other terms, which may also be unknown to the reader.
-However, the document is self-contained and the reader, provided they have a
-basic networking background, will find enough information
-and examples to understand and configure Linux-2.2 IP and IPv6
-networking.
-
-This document is split into sections explaining \verb|ip| commands
-and options, decrypting \verb|ip| output and containing a few examples.
-More voluminous examples and some topics, which require more elaborate
-discussion, are in the appendix.
-
-The paragraphs beginning with NB contain side notes, warnings about
-bugs and design drawbacks. They may be skipped at the first reading.
-
-\section{{\tt ip} --- command syntax}
-
-The generic form of an \verb|ip| command is:
-\begin{verbatim}
-ip [ OPTIONS ] OBJECT [ COMMAND [ ARGUMENTS ]]
-\end{verbatim}
-where \verb|OPTIONS| is a set of optional modifiers affecting the
-general behaviour of the \verb|ip| utility or changing its output. All options
-begin with the character \verb|'-'| and may be used in either long or abbreviated
-forms. Currently, the following options are available:
-
-\begin{itemize}
-\item \verb|-V|, \verb|-Version|
-
---- print the version of the \verb|ip| utility and exit.
-
-
-\item \verb|-s|, \verb|-stats|, \verb|-statistics|
-
---- output more information. If the option
-appears twice or more, the amount of information increases.
-As a rule, the information is statistics or some time values.
-
-\item \verb|-d|, \verb|-details|
-
---- output more detailed information.
-
-\item \verb|-f|, \verb|-family| followed by a protocol family
-identifier: \verb|inet|, \verb|inet6| or \verb|link|.
-
---- enforce the protocol family to use. If the option is not present,
-the protocol family is guessed from other arguments. If the rest of the command
-line does not give enough information to guess the family, \verb|ip| falls back to the default
-one, usually \verb|inet| or \verb|any|. \verb|link| is a special family
-identifier meaning that no networking protocol is involved.
-
-\item \verb|-4|
-
---- shortcut for \verb|-family inet|.
-
-\item \verb|-6|
-
---- shortcut for \verb|-family inet6|.
-
-\item \verb|-0|
-
---- shortcut for \verb|-family link|.
-
-
-\item \verb|-o|, \verb|-oneline|
-
---- output each record on a single line, replacing line feeds
-with the \verb|'\'| character. This is convenient when you want to
-count records with \verb|wc| or to \verb|grep| the output. The trivial
-script \verb|rtpr| converts the output back into readable form.
-
-\item \verb|-r|, \verb|-resolve|
-
---- use the system's name resolver to print DNS names instead of
-host addresses.
-
-\begin{NB}
- Do not use this option when reporting bugs or asking for advice.
-\end{NB}
-\begin{NB}
- \verb|ip| never uses DNS to resolve names to addresses.
-\end{NB}
-
-\item \verb|-b|, \verb|-batch FILE|
-
---- read commands from provided file or standart input and invoke them.
-First failure will cause termination of \verb|ip|.
-In batch \verb|FILE| everything which begins with \verb|#| symbol is
-ignored and can be used for comments.
-\paragraph{Example:}
-\begin{verbatim}
-kuznet@kaiser $ cat /tmp/ip_batch.ip
-# This is a comment
-tuntap add mode tap tap1 # This is an another comment
-link set up dev tap1
-addr add 10.0.0.1/24 dev tap1
-kuznet@kaiser $ sudo ip -b /tmp/ip_batch.ip
-\end{verbatim}
-or from standart input:
-\begin{verbatim}
-kuznet@kaiser $ cat /tmp/ip_batch.ip | sudo ip -b -
-\end{verbatim}
-
-\item \verb|-force|
-
---- don't terminate ip on errors in batch mode.
-If there were any errors during execution of the commands,
-the application return code will be non zero.
-
-\item \verb|-l|, \verb|-loops COUNT|
-
---- specify maximum number of loops the 'ip addr flush' logic will attempt
-before giving up. The default is 10. Zero (0) means loop until all
-addresses are removed.
-
-\end{itemize}
-
-\verb|OBJECT| is the object to manage or to get information about.
-The object types currently understood by \verb|ip| are:
-
-\begin{itemize}
-\item \verb|link| --- network device
-\item \verb|address| --- protocol (IP or IPv6) address on a device
-\item \verb|neighbour| --- ARP or NDISC cache entry
-\item \verb|route| --- routing table entry
-\item \verb|rule| --- rule in routing policy database
-\item \verb|maddress| --- multicast address
-\item \verb|mroute| --- multicast routing cache entry
-\item \verb|tunnel| --- tunnel over IP
-\end{itemize}
-
-Again, the names of all objects may be written in full or
-abbreviated form, f.e.\ \verb|address| is abbreviated as \verb|addr|
-or just \verb|a|.
-
-\verb|COMMAND| specifies the action to perform on the object.
-The set of possible actions depends on the object type.
-As a rule, it is possible to \verb|add|, \verb|delete| and
-\verb|show| (or \verb|list|) objects, but some objects
-do not allow all of these operations or have some additional commands.
-The \verb|help| command is available for all objects. It prints
-out a list of available commands and argument syntax conventions.
-
-If no command is given, some default command is assumed.
-Usually it is \verb|list| or, if the objects of this class
-cannot be listed, \verb|help|.
-
-\verb|ARGUMENTS| is a list of arguments to the command.
-The arguments depend on the command and object. There are two types of arguments:
-{\em flags\/}, consisting of a single keyword, and {\em parameters\/},
-consisting of a keyword followed by a value. For convenience,
-each command has some {\em default parameter\/}
-which may be omitted. F.e.\ parameter \verb|dev| is the default
-for the {\tt ip link} command, so {\tt ip link ls eth0} is equivalent
-to {\tt ip link ls dev eth0}.
-In the command descriptions below such parameters
-are distinguished with the marker: ``(default)''.
-
-Almost all keywords may be abbreviated with several first (or even single)
-letters. The shortcuts are convenient when \verb|ip| is used interactively,
-but they are not recommended in scripts or when reporting bugs
-or asking for advice. ``Officially'' allowed abbreviations are listed
-in the document body.
-
-
-
-\section{{\tt ip} --- error messages}
-
-\verb|ip| may fail for one of the following reasons:
-
-\begin{itemize}
-\item
-A syntax error on the command line: an unknown keyword, incorrectly formatted
-IP address {\em et al\/}. In this case \verb|ip| prints an error message
-and exits. As a rule, the error message will contain information
-about the reason for the failure. Sometimes it also prints a help page.
-
-\item
-The arguments did not pass verification for self-consistency.
-
-\item
-\verb|ip| failed to compile a kernel request from the arguments
-because the user didn't give enough information.
-
-\item
-The kernel returned an error to some syscall. In this case \verb|ip|
-prints the error message, as it is output with \verb|perror(3)|,
-prefixed with a comment and a syscall identifier.
-
-\item
-The kernel returned an error to some RTNETLINK request.
-In this case \verb|ip| prints the error message, as it is output
-with \verb|perror(3)| prefixed with ``RTNETLINK answers:''.
-
-\end{itemize}
-
-All the operations are atomic, i.e.\
-if the \verb|ip| utility fails, it does not change anything
-in the system. One harmful exception is \verb|ip link| command
-(Sec.\ref{IP-LINK}, p.\pageref{IP-LINK}),
-which may change only some of the device parameters given
-on command line.
-
-It is difficult to list all the error messages (especially
-syntax errors). However, as a rule, their meaning is clear
-from the context of the command.
-
-The most common mistakes are:
-
-\begin{enumerate}
-\item Netlink is not configured in the kernel. The message is:
-\begin{verbatim}
-Cannot open netlink socket: Invalid value
-\end{verbatim}
-
-\item RTNETLINK is not configured in the kernel. In this case
-one of the following messages may be printed, depending on the command:
-\begin{verbatim}
-Cannot talk to rtnetlink: Connection refused
-Cannot send dump request: Connection refused
-\end{verbatim}
-
-\item The \verb|CONFIG_IP_MULTIPLE_TABLES| option was not selected
-when configuring the kernel. In this case any attempt to use the
-\verb|ip| \verb|rule| command will fail, f.e.
-\begin{verbatim}
-kuznet@kaiser $ ip rule list
-RTNETLINK error: Invalid argument
-dump terminated
-\end{verbatim}
-
-\end{enumerate}
-
-
-\section{{\tt ip link} --- network device configuration}
-\label{IP-LINK}
-
-\paragraph{Object:} A \verb|link| is a network device and the corresponding
-commands display and change the state of devices.
-
-\paragraph{Commands:} \verb|set| and \verb|show| (or \verb|list|).
-
-\subsection{{\tt ip link set} --- change device attributes}
-
-\paragraph{Abbreviations:} \verb|set|, \verb|s|.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-\item \verb|dev NAME| (default)
-
---- \verb|NAME| specifies the network device on which to operate.
-
-\item \verb|up| and \verb|down|
-
---- change the state of the device to \verb|UP| or \verb|DOWN|.
-
-\item \verb|arp on| or \verb|arp off|
-
---- change the \verb|NOARP| flag on the device.
-
-\begin{NB}
-This operation is {\em not allowed\/} if the device is in state \verb|UP|.
-Though neither the \verb|ip| utility nor the kernel check for this condition.
-You can get unpredictable results changing this flag while the
-device is running.
-\end{NB}
-
-\item \verb|multicast on| or \verb|multicast off|
-
---- change the \verb|MULTICAST| flag on the device.
-
-\item \verb|dynamic on| or \verb|dynamic off|
-
---- change the \verb|DYNAMIC| flag on the device.
-
-\item \verb|name NAME|
-
---- change the name of the device. This operation is not
-recommended if the device is running or has some addresses
-already configured.
-
-\item \verb|txqueuelen NUMBER| or \verb|txqlen NUMBER|
-
---- change the transmit queue length of the device.
-
-\item \verb|mtu NUMBER|
-
---- change the MTU of the device.
-
-\item \verb|address LLADDRESS|
-
---- change the station address of the interface.
-
-\item \verb|broadcast LLADDRESS|, \verb|brd LLADDRESS| or \verb|peer LLADDRESS|
-
---- change the link layer broadcast address or the peer address when
-the interface is \verb|POINTOPOINT|.
-
-\vskip 1mm
-\begin{NB}
-For most devices (f.e.\ for Ethernet) changing the link layer
-broadcast address will break networking.
-Do not use it, if you do not understand what this operation really does.
-\end{NB}
-
-\item \verb|netns PID|
-
---- move the device to the network namespace associated with the process PID.
-
-\end{itemize}
-
-\vskip 1mm
-\begin{NB}
-The \verb|PROMISC| and \verb|ALLMULTI| flags are considered
-obsolete and should not be changed administratively, though
-the {\tt ip} utility will allow that.
-\end{NB}
-
-\paragraph{Warning:} If multiple parameter changes are requested,
-\verb|ip| aborts immediately after any of the changes have failed.
-This is the only case when \verb|ip| can move the system to
-an unpredictable state. The solution is to avoid changing
-several parameters with one {\tt ip link set} call.
-
-\paragraph{Examples:}
-\begin{itemize}
-\item \verb|ip link set dummy address 00:00:00:00:00:01|
-
---- change the station address of the interface \verb|dummy|.
-
-\item \verb|ip link set dummy up|
-
---- start the interface \verb|dummy|.
-
-\end{itemize}
-
-
-\subsection{{\tt ip link show} --- display device attributes}
-\label{IP-LINK-SHOW}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
-\verb|l|.
-
-\paragraph{Arguments:}
-\begin{itemize}
-\item \verb|dev NAME| (default)
-
---- \verb|NAME| specifies the network device to show.
-If this argument is omitted all devices are listed.
-
-\item \verb|up|
-
---- only display running interfaces.
-
-\end{itemize}
-
-
-\paragraph{Output format:}
-
-\begin{verbatim}
-kuznet@alisa:~ $ ip link ls eth0
-3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
- link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
-kuznet@alisa:~ $ ip link ls sit0
-5: sit0@NONE: <NOARP,UP> mtu 1480 qdisc noqueue
- link/sit 0.0.0.0 brd 0.0.0.0
-kuznet@alisa:~ $ ip link ls dummy
-2: dummy: <BROADCAST,NOARP> mtu 1500 qdisc noop
- link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
-kuznet@alisa:~ $
-\end{verbatim}
-
-
-The number before each colon is an {\em interface index\/} or {\em ifindex\/}.
-This number uniquely identifies the interface. This is followed by the {\em interface name\/}
-(\verb|eth0|, \verb|sit0| etc.). The interface name is also
-unique at every given moment. However, the interface may disappear from the
-list (f.e.\ when the corresponding driver module is unloaded) and another
-one with the same name may be created later. Besides that,
-the administrator may change the name of any device with
-\verb|ip| \verb|link| \verb|set| \verb|name|
-to make it more intelligible.
-
-The interface name may have another name or \verb|NONE| appended
-after the \verb|@| sign. This means that this device is bound to some other
-device,
-i.e.\ packets send through it are encapsulated and sent via the ``master''
-device. If the name is \verb|NONE|, the master is unknown.
-
-Then we see the interface {\em mtu\/} (``maximal transfer unit''). This determines
-the maximal size of data which can be sent as a single packet over this interface.
-
-{\em qdisc\/} (``queuing discipline'') shows the queuing algorithm used
-on the interface. Particularly, \verb|noqueue| means that this interface
-does not queue anything and \verb|noop| means that the interface is in blackhole
-mode i.e.\ all packets sent to it are immediately discarded.
-{\em qlen\/} is the default transmit queue length of the device measured
-in packets.
-
-The interface flags are summarized in the angle brackets.
-
-\begin{itemize}
-\item \verb|UP| --- the device is turned on. It is ready to accept
-packets for transmission and it may inject into the kernel packets received
-from other nodes on the network.
-
-\item \verb|LOOPBACK| --- the interface does not communicate with other
-hosts. All packets sent through it will be returned
-and nothing but bounced packets can be received.
-
-\item \verb|BROADCAST| --- the device has the facility to send packets
-to all hosts sharing the same link. A typical example is an Ethernet link.
-
-\item \verb|POINTOPOINT| --- the link has only two ends with one node
-attached to each end. All packets sent to this link will reach the peer
-and all packets received by us came from this single peer.
-
-If neither \verb|LOOPBACK| nor \verb|BROADCAST| nor \verb|POINTOPOINT|
-are set, the interface is assumed to be NMBA (Non-Broadcast Multi-Access).
-This is the most generic type of device and the most complicated one, because
-the host attached to a NBMA link has no means to send to anyone
-without additionally configured information.
-
-\item \verb|MULTICAST| --- is an advisory flag indicating that the interface
-is aware of multicasting i.e.\ sending packets to some subset of neighbouring
-nodes. Broadcasting is a particular case of multicasting, where the multicast
-group consists of all nodes on the link. It is important to emphasize
-that software {\em must not\/} interpret the absence of this flag as the inability
-to use multicasting on this interface. Any \verb|POINTOPOINT| and
-\verb|BROADCAST| link is multicasting by definition, because we have
-direct access to all the neighbours and, hence, to any part of them.
-Certainly, the use of high bandwidth multicast transfers is not recommended
-on broadcast-only links because of high expense, but it is not strictly
-prohibited.
-
-\item \verb|PROMISC| --- the device listens to and feeds to the kernel all
-traffic on the link even if it is not destined for us, not broadcasted
-and not destined for a multicast group of which we are member. Usually
-this mode exists only on broadcast links and is used by bridges and for network
-monitoring.
-
-\item \verb|ALLMULTI| --- the device receives all multicast packets
-wandering on the link. This mode is used by multicast routers.
-
-\item \verb|NOARP| --- this flag is different from the other ones. It has
-no invariant value and its interpretation depends on the network protocols
-involved. As a rule, it indicates that the device needs no address
-resolution and that the software or hardware knows how to deliver packets
-without any help from the protocol stacks.
-
-\item \verb|DYNAMIC| --- is an advisory flag indicating that the interface is
-dynamically created and destroyed.
-
-\item \verb|SLAVE| --- this interface is bonded to some other interfaces
-to share link capacities.
-
-\end{itemize}
-
-\vskip 1mm
-\begin{NB}
-There are other flags but they are either obsolete (\verb|NOTRAILERS|)
-or not implemented (\verb|DEBUG|) or specific to some devices
-(\verb|MASTER|, \verb|AUTOMEDIA| and \verb|PORTSEL|). We do not discuss
-them here.
-\end{NB}
-
-
-The second line contains information on the link layer addresses
-associated with the device. The first word (\verb|ether|, \verb|sit|)
-defines the interface hardware type. This type determines the format and semantics
-of the addresses and is logically part of the address.
-The default format of the station address and the broadcast address
-(or the peer address for pointopoint links) is a
-sequence of hexadecimal bytes separated by colons, but some link
-types may have their natural address format, f.e.\ addresses
-of tunnels over IP are printed as dotted-quad IP addresses.
-
-\vskip 1mm
-\begin{NB}
- NBMA links have no well-defined broadcast or peer address,
- however this field may contain useful information, f.e.\
- about the address of broadcast relay or about the address of the ARP server.
-\end{NB}
-\begin{NB}
-Multicast addresses are not shown by this command, see
-\verb|ip maddr ls| in~Sec.\ref{IP-MADDR} (p.\pageref{IP-MADDR} of this
-document).
-\end{NB}
-
-
-\paragraph{Statistics:} With the \verb|-statistics| option, \verb|ip| also
-prints interface statistics:
-
-\begin{verbatim}
-kuznet@alisa:~ $ ip -s link ls eth0
-3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
- link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
- RX: bytes packets errors dropped overrun mcast
- 2449949362 2786187 0 0 0 0
- TX: bytes packets errors dropped carrier collsns
- 178558497 1783945 332 0 332 35172
-kuznet@alisa:~ $
-\end{verbatim}
-\verb|RX:| and \verb|TX:| lines summarize receiver and transmitter
-statistics. They contain:
-\begin{itemize}
-\item \verb|bytes| --- the total number of bytes received or transmitted
-on the interface. This number wraps when the maximal length of the data type
-natural for the architecture is exceeded, so continuous monitoring requires
-a user level daemon snapping it periodically.
-\item \verb|packets| --- the total number of packets received or transmitted
-on the interface.
-\item \verb|errors| --- the total number of receiver or transmitter errors.
-\item \verb|dropped| --- the total number of packets dropped due to lack
-of resources.
-\item \verb|overrun| --- the total number of receiver overruns resulting
-in dropped packets. As a rule, if the interface is overrun, it means
-serious problems in the kernel or that your machine is too slow
-for this interface.
-\item \verb|mcast| --- the total number of received multicast packets. This option
-is only supported by a few devices.
-\item \verb|carrier| --- total number of link media failures f.e.\ because
-of lost carrier.
-\item \verb|collsns| --- the total number of collision events
-on Ethernet-like media. This number may have a different sense on other
-link types.
-\item \verb|compressed| --- the total number of compressed packets. This is
-available only for links using VJ header compression.
-\end{itemize}
-
-
-If the \verb|-s| option is entered twice or more,
-\verb|ip| prints more detailed statistics on receiver
-and transmitter errors.
-
-\begin{verbatim}
-kuznet@alisa:~ $ ip -s -s link ls eth0
-3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
- link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
- RX: bytes packets errors dropped overrun mcast
- 2449949362 2786187 0 0 0 0
- RX errors: length crc frame fifo missed
- 0 0 0 0 0
- TX: bytes packets errors dropped carrier collsns
- 178558497 1783945 332 0 332 35172
- TX errors: aborted fifo window heartbeat
- 0 0 0 332
-kuznet@alisa:~ $
-\end{verbatim}
-These error names are pure Ethernetisms. Other devices
-may have non zero values in these fields but they may be
-interpreted differently.
-
-
-\section{{\tt ip address} --- protocol address management}
-
-\paragraph{Abbreviations:} \verb|address|, \verb|addr|, \verb|a|.
-
-\paragraph{Object:} The \verb|address| is a protocol (IP or IPv6) address attached
-to a network device. Each device must have at least one address
-to use the corresponding protocol. It is possible to have several
-different addresses attached to one device. These addresses are not
-discriminated, so that the term {\em alias\/} is not quite appropriate
-for them and we do not use it in this document.
-
-The \verb|ip addr| command displays addresses and their properties,
-adds new addresses and deletes old ones.
-
-\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|flush| and \verb|show|
-(or \verb|list|).
-
-
-\subsection{{\tt ip address add} --- add a new protocol address}
-\label{IP-ADDR-ADD}
-
-\paragraph{Abbreviations:} \verb|add|, \verb|a|.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-\item \verb|dev NAME|
-
-\noindent--- the name of the device to add the address to.
-
-\item \verb|local ADDRESS| (default)
-
---- the address of the interface. The format of the address depends
-on the protocol. It is a dotted quad for IP and a sequence of hexadecimal halfwords
-separated by colons for IPv6. The \verb|ADDRESS| may be followed by
-a slash and a decimal number which encodes the network prefix length.
-
-
-\item \verb|peer ADDRESS|
-
---- the address of the remote endpoint for pointopoint interfaces.
-Again, the \verb|ADDRESS| may be followed by a slash and a decimal number,
-encoding the network prefix length. If a peer address is specified,
-the local address {\em cannot\/} have a prefix length. The network prefix is associated
-with the peer rather than with the local address.
-
-
-\item \verb|broadcast ADDRESS|
-
---- the broadcast address on the interface.
-
-It is possible to use the special symbols \verb|'+'| and \verb|'-'|
-instead of the broadcast address. In this case, the broadcast address
-is derived by setting/resetting the host bits of the interface prefix.
-
-\vskip 1mm
-\begin{NB}
-Unlike \verb|ifconfig|, the \verb|ip| utility {\em does not\/} set any broadcast
-address unless explicitly requested.
-\end{NB}
-
-
-\item \verb|label NAME|
-
---- Each address may be tagged with a label string.
-In order to preserve compatibility with Linux-2.0 net aliases,
-this string must coincide with the name of the device or must be prefixed
-with the device name followed by colon.
-
-
-\item \verb|scope SCOPE_VALUE|
-
---- the scope of the area where this address is valid.
-The available scopes are listed in file \verb|/etc/iproute2/rt_scopes|.
-Predefined scope values are:
-
- \begin{itemize}
- \item \verb|global| --- the address is globally valid.
- \item \verb|site| --- (IPv6 only) the address is site local,
- i.e.\ it is valid inside this site.
- \item \verb|link| --- the address is link local, i.e.\
- it is valid only on this device.
- \item \verb|host| --- the address is valid only inside this host.
- \end{itemize}
-
-Appendix~\ref{ADDR-SEL} (p.\pageref{ADDR-SEL} of this document)
-contains more details on address scopes.
-
-\end{itemize}
-
-\paragraph{Examples:}
-\begin{itemize}
-\item \verb|ip addr add 127.0.0.1/8 dev lo brd + scope host|
-
---- add the usual loopback address to the loopback device.
-
-\item \verb|ip addr add 10.0.0.1/24 brd + dev eth0 label eth0:Alias|
-
---- add the address 10.0.0.1 with prefix length 24 (i.e.\ netmask
-\verb|255.255.255.0|), standard broadcast and label \verb|eth0:Alias|
-to the interface \verb|eth0|.
-\end{itemize}
-
-
-\subsection{{\tt ip address delete} --- delete a protocol address}
-
-\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
-
-\paragraph{Arguments:} coincide with the arguments of \verb|ip addr add|.
-The device name is a required argument. The rest are optional.
-If no arguments are given, the first address is deleted.
-
-\paragraph{Examples:}
-\begin{itemize}
-\item \verb|ip addr del 127.0.0.1/8 dev lo|
-
---- deletes the loopback address from the loopback device.
-It would be best not to repeat this experiment.
-
-\item Disable IP on the interface \verb|eth0|:
-\begin{verbatim}
- while ip -f inet addr del dev eth0; do
- : nothing
- done
-\end{verbatim}
-Another method to disable IP on an interface using {\tt ip addr flush}
-may be found in sec.\ref{IP-ADDR-FLUSH}, p.\pageref{IP-ADDR-FLUSH}.
-
-\end{itemize}
-
-
-\subsection{{\tt ip address show} --- display protocol addresses}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
-\verb|l|.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-\item \verb|dev NAME| (default)
-
---- the name of the device.
-
-\item \verb|scope SCOPE_VAL|
-
---- only list addresses with this scope.
-
-\item \verb|to PREFIX|
-
---- only list addresses matching this prefix.
-
-\item \verb|label PATTERN|
-
---- only list addresses with labels matching the \verb|PATTERN|.
-\verb|PATTERN| is a usual shell style pattern.
-
-
-\item \verb|dynamic| and \verb|permanent|
-
---- (IPv6 only) only list addresses installed due to stateless
-address configuration or only list permanent (not dynamic) addresses.
-
-\item \verb|tentative|
-
---- (IPv6 only) only list addresses which did not pass duplicate
-address detection.
-
-\item \verb|deprecated|
-
---- (IPv6 only) only list deprecated addresses.
-
-
-\item \verb|primary| and \verb|secondary|
-
---- only list primary (or secondary) addresses.
-
-\end{itemize}
-
-
-\paragraph{Output format:}
-
-\begin{verbatim}
-kuznet@alisa:~ $ ip addr ls eth0
-3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
- link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
- inet 193.233.7.90/24 brd 193.233.7.255 scope global eth0
- inet6 3ffe:2400:0:1:2a0:ccff:fe66:1878/64 scope global dynamic
- valid_lft forever preferred_lft 604746sec
- inet6 fe80::2a0:ccff:fe66:1878/10 scope link
-kuznet@alisa:~ $
-\end{verbatim}
-
-The first two lines coincide with the output of \verb|ip link ls|.
-It is natural to interpret link layer addresses
-as addresses of the protocol family \verb|AF_PACKET|.
-
-Then the list of IP and IPv6 addresses follows, accompanied by
-additional address attributes: scope value (see Sec.\ref{IP-ADDR-ADD},
-p.\pageref{IP-ADDR-ADD} above), flags and the address label.
-
-Address flags are set by the kernel and cannot be changed
-administratively. Currently, the following flags are defined:
-
-\begin{enumerate}
-\item \verb|secondary|
-
---- the address is not used when selecting the default source address
-of outgoing packets (Cf.\ Appendix~\ref{ADDR-SEL}, p.\pageref{ADDR-SEL}.).
-An IP address becomes secondary if another address with the same
-prefix bits already exists. The first address is primary.
-It is the leader of the group of all secondary addresses. When the leader
-is deleted, all secondaries are purged too.
-There is a tweak in \verb|/proc/sys/net/ipv4/conf/<dev>/promote_secondaries|
-which activate secondaries promotion when a primary is deleted.
-To permanently enable this feature on all devices add
-\verb|net.ipv4.conf.all.promote_secondaries=1| to \verb|/etc/sysctl.conf|.
-This tweak is available in linux 2.6.15 and later.
-
-
-\item \verb|dynamic|
-
---- the address was created due to stateless autoconfiguration~\cite{RFC-ADDRCONF}.
-In this case the output also contains information on times, when
-the address is still valid. After \verb|preferred_lft| expires the address is
-moved to the deprecated state. After \verb|valid_lft| expires the address
-is finally invalidated.
-
-\item \verb|deprecated|
-
---- the address is deprecated, i.e.\ it is still valid, but cannot
-be used by newly created connections.
-
-\item \verb|tentative|
-
---- the address is not used because duplicate address detection~\cite{RFC-ADDRCONF}
-is still not complete or failed.
-
-\end{enumerate}
-
-
-\subsection{{\tt ip address flush} --- flush protocol addresses}
-\label{IP-ADDR-FLUSH}
-
-\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
-
-\paragraph{Description:}This command flushes the protocol addresses
-selected by some criteria.
-
-\paragraph{Arguments:} This command has the same arguments as \verb|show|.
-The difference is that it does not run when no arguments are given.
-
-\paragraph{Warning:} This command (and other \verb|flush| commands
-described below) is pretty dangerous. If you make a mistake, it will
-not forgive it, but will cruelly purge all the addresses.
-
-\paragraph{Statistics:} With the \verb|-statistics| option, the command
-becomes verbose. It prints out the number of deleted addresses and the number
-of rounds made to flush the address list. If this option is given
-twice, \verb|ip addr flush| also dumps all the deleted addresses
-in the format described in the previous subsection.
-
-\paragraph{Example:} Delete all the addresses from the private network
-10.0.0.0/8:
-\begin{verbatim}
-netadm@amber:~ # ip -s -s a f to 10/8
-2: dummy inet 10.7.7.7/16 brd 10.7.255.255 scope global dummy
-3: eth0 inet 10.10.7.7/16 brd 10.10.255.255 scope global eth0
-4: eth1 inet 10.8.7.7/16 brd 10.8.255.255 scope global eth1
-
-*** Round 1, deleting 3 addresses ***
-*** Flush is complete after 1 round ***
-netadm@amber:~ #
-\end{verbatim}
-Another instructive example is disabling IP on all the Ethernets:
-\begin{verbatim}
-netadm@amber:~ # ip -4 addr flush label "eth*"
-\end{verbatim}
-And the last example shows how to flush all the IPv6 addresses
-acquired by the host from stateless address autoconfiguration
-after you enabled forwarding or disabled autoconfiguration.
-\begin{verbatim}
-netadm@amber:~ # ip -6 addr flush dynamic
-\end{verbatim}
-
-
-
-\section{{\tt ip neighbour} --- neighbour/arp tables management}
-
-\paragraph{Abbreviations:} \verb|neighbour|, \verb|neighbor|, \verb|neigh|,
-\verb|n|.
-
-\paragraph{Object:} \verb|neighbour| objects establish bindings between protocol
-addresses and link layer addresses for hosts sharing the same link.
-Neighbour entries are organized into tables. The IPv4 neighbour table
-is known by another name --- the ARP table.
-
-The corresponding commands display neighbour bindings
-and their properties, add new neighbour entries and delete old ones.
-
-\paragraph{Commands:} \verb|add|, \verb|change|, \verb|replace|,
-\verb|delete|, \verb|flush| and \verb|show| (or \verb|list|).
-
-\paragraph{See also:} Appendix~\ref{PROXY-NEIGH}, p.\pageref{PROXY-NEIGH}
-describes how to manage proxy ARP/NDISC with the \verb|ip| utility.
-
-
-\subsection{{\tt ip neighbour add} --- add a new neighbour entry\\
- {\tt ip neighbour change} --- change an existing entry\\
- {\tt ip neighbour replace} --- add a new entry or change an existing one}
-
-\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
-\verb|replace|, \verb|repl|.
-
-\paragraph{Description:} These commands create new neighbour records
-or update existing ones.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-\item \verb|to ADDRESS| (default)
-
---- the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
-
-\item \verb|dev NAME|
-
---- the interface to which this neighbour is attached.
-
-
-\item \verb|lladdr LLADDRESS|
-
---- the link layer address of the neighbour. \verb|LLADDRESS| can also be
-\verb|null|.
-
-\item \verb|nud NUD_STATE|
-
---- the state of the neighbour entry. \verb|nud| is an abbreviation for ``Neighbour
-Unreachability Detection''. The state can take one of the following values:
-
-\begin{enumerate}
-\item \verb|permanent| --- the neighbour entry is valid forever and can be only be removed
-administratively.
-\item \verb|noarp| --- the neighbour entry is valid. No attempts to validate
-this entry will be made but it can be removed when its lifetime expires.
-\item \verb|reachable| --- the neighbour entry is valid until the reachability
-timeout expires.
-\item \verb|stale| --- the neighbour entry is valid but suspicious.
-This option to \verb|ip neigh| does not change the neighbour state if
-it was valid and the address is not changed by this command.
-\end{enumerate}
-
-\end{itemize}
-
-\paragraph{Examples:}
-\begin{itemize}
-\item \verb|ip neigh add 10.0.0.3 lladdr 0:0:0:0:0:1 dev eth0 nud perm|
-
---- add a permanent ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
-
-\item \verb|ip neigh chg 10.0.0.3 dev eth0 nud reachable|
-
---- change its state to \verb|reachable|.
-\end{itemize}
-
-
-\subsection{{\tt ip neighbour delete} --- delete a neighbour entry}
-
-\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
-
-\paragraph{Description:} This command invalidates a neighbour entry.
-
-\paragraph{Arguments:} The arguments are the same as with \verb|ip neigh add|,
-except that \verb|lladdr| and \verb|nud| are ignored.
-
-
-\paragraph{Example:}
-\begin{itemize}
-\item \verb|ip neigh del 10.0.0.3 dev eth0|
-
---- invalidate an ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
-
-\end{itemize}
-
-\begin{NB}
- The deleted neighbour entry will not disappear from the tables
- immediately. If it is in use it cannot be deleted until the last
- client releases it. Otherwise it will be destroyed during
- the next garbage collection.
-\end{NB}
-
-
-\paragraph{Warning:} Attempts to delete or manually change
-a \verb|noarp| entry created by the kernel may result in unpredictable behaviour.
-Particularly, the kernel may try to resolve this address even
-on a \verb|NOARP| interface or if the address is multicast or broadcast.
-
-
-\subsection{{\tt ip neighbour show} --- list neighbour entries}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|.
-
-\paragraph{Description:}This commands displays neighbour tables.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-
-\item \verb|to ADDRESS| (default)
-
---- the prefix selecting the neighbours to list.
-
-\item \verb|dev NAME|
-
---- only list the neighbours attached to this device.
-
-\item \verb|unused|
-
---- only list neighbours which are not currently in use.
-
-\item \verb|nud NUD_STATE|
-
---- only list neighbour entries in this state. \verb|NUD_STATE| takes
-values listed below or the special value \verb|all| which means all states.
-This option may occur more than once. If this option is absent, \verb|ip|
-lists all entries except for \verb|none| and \verb|noarp|.
-
-\end{itemize}
-
-
-\paragraph{Output format:}
-
-\begin{verbatim}
-kuznet@alisa:~ $ ip neigh ls
-:: dev lo lladdr 00:00:00:00:00:00 nud noarp
-fe80::200:cff:fe76:3f85 dev eth0 lladdr 00:00:0c:76:3f:85 router \
- nud stale
-0.0.0.0 dev lo lladdr 00:00:00:00:00:00 nud noarp
-193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 nud reachable
-193.233.7.85 dev eth0 lladdr 00:e0:1e:63:39:00 nud stale
-kuznet@alisa:~ $
-\end{verbatim}
-
-The first word of each line is the protocol address of the neighbour.
-Then the device name follows. The rest of the line describes the contents of
-the neighbour entry identified by the pair (device, address).
-
-\verb|lladdr| is the link layer address of the neighbour.
-
-\verb|nud| is the state of the ``neighbour unreachability detection'' machine
-for this entry. The detailed description of the neighbour
-state machine can be found in~\cite{RFC-NDISC}. Here is the full list
-of the states with short descriptions:
-
-\begin{enumerate}
-\item\verb|none| --- the state of the neighbour is void.
-\item\verb|incomplete| --- the neighbour is in the process of resolution.
-\item\verb|reachable| --- the neighbour is valid and apparently reachable.
-\item\verb|stale| --- the neighbour is valid, but is probably already
-unreachable, so the kernel will try to check it at the first transmission.
-\item\verb|delay| --- a packet has been sent to the stale neighbour and the kernel is waiting
-for confirmation.
-\item\verb|probe| --- the delay timer expired but no confirmation was received.
-The kernel has started to probe the neighbour with ARP/NDISC messages.
-\item\verb|failed| --- resolution has failed.
-\item\verb|noarp| --- the neighbour is valid. No attempts to check the entry
-will be made.
-\item\verb|permanent| --- it is a \verb|noarp| entry, but only the administrator
-may remove the entry from the neighbour table.
-\end{enumerate}
-
-The link layer address is valid in all states except for \verb|none|,
-\verb|failed| and \verb|incomplete|.
-
-IPv6 neighbours can be marked with the additional flag \verb|router|
-which means that the neighbour introduced itself as an IPv6 router~\cite{RFC-NDISC}.
-
-\paragraph{Statistics:} The \verb|-statistics| option displays some usage
-statistics, f.e.\
-
-\begin{verbatim}
-kuznet@alisa:~ $ ip -s n ls 193.233.7.254
-193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
- nud reachable
-kuznet@alisa:~ $
-\end{verbatim}
-
-Here \verb|ref| is the number of users of this entry
-and \verb|used| is a triplet of time intervals in seconds
-separated by slashes. In this case they show that:
-
-\begin{enumerate}
-\item the entry was used 12 seconds ago.
-\item the entry was confirmed 13 seconds ago.
-\item the entry was updated 20 seconds ago.
-\end{enumerate}
-
-\subsection{{\tt ip neighbour flush} --- flush neighbour entries}
-
-\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
-
-\paragraph{Description:}This command flushes neighbour tables, selecting
-entries to flush by some criteria.
-
-\paragraph{Arguments:} This command has the same arguments as \verb|show|.
-The differences are that it does not run when no arguments are given,
-and that the default neighbour states to be flushed do not include
-\verb|permanent| and \verb|noarp|.
-
-
-\paragraph{Statistics:} With the \verb|-statistics| option, the command
-becomes verbose. It prints out the number of deleted neighbours and the number
-of rounds made to flush the neighbour table. If the option is given
-twice, \verb|ip neigh flush| also dumps all the deleted neighbours
-in the format described in the previous subsection.
-
-\paragraph{Example:}
-\begin{verbatim}
-netadm@alisa:~ # ip -s -s n f 193.233.7.254
-193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
- nud reachable
-
-*** Round 1, deleting 1 entries ***
-*** Flush is complete after 1 round ***
-netadm@alisa:~ #
-\end{verbatim}
-
-
-\section{{\tt ip route} --- routing table management}
-\label{IP-ROUTE}
-
-\paragraph{Abbreviations:} \verb|route|, \verb|ro|, \verb|r|.
-
-\paragraph{Object:} \verb|route| entries in the kernel routing tables keep
-information about paths to other networked nodes.
-
-Each route entry has a {\em key\/} consisting of a {\em prefix\/}
-(i.e.\ a pair containing a network address and the length of its mask) and,
-optionally, the TOS value. An IP packet matches the route if the highest
-bits of its destination address are equal to the route prefix at least
-up to the prefix length and if the TOS of the route is zero or equal to
-the TOS of the packet.
-
-If several routes match the packet, the following pruning rules
-are used to select the best one (see~\cite{RFC1812}):
-\begin{enumerate}
-\item The longest matching prefix is selected. All shorter ones
-are dropped.
-
-\item If the TOS of some route with the longest prefix is equal to the TOS
-of the packet, the routes with different TOS are dropped.
-
-If no exact TOS match was found and routes with TOS=0 exist,
-the rest of routes are pruned.
-
-Otherwise, the route lookup fails.
-
-\item If several routes remain after the previous steps, then
-the routes with the best preference values are selected.
-
-\item If we still have several routes, then the {\em first\/} of them
-is selected.
-
-\begin{NB}
- Note the ambiguity of the last step. Unfortunately, Linux
- historically allows such a bizarre situation. The sense of the
-word ``first'' depends on the order of route additions and it is practically
-impossible to maintain a bundle of such routes in this order.
-\end{NB}
-
-For simplicity we will limit ourselves to the case where such a situation
-is impossible and routes are uniquely identified by the triplet
-\{prefix, tos, preference\}. Actually, it is impossible to create
-non-unique routes with \verb|ip| commands described in this section.
-
-One useful exception to this rule is the default route on non-forwarding
-hosts. It is ``officially'' allowed to have several fallback routes
-when several routers are present on directly connected networks.
-In this case, Linux-2.2 makes ``dead gateway detection''~\cite{RFC1122}
-controlled by neighbour unreachability detection and by advice
-from transport protocols to select a working router, so the order
-of the routes is not essential. However, in this case,
-fiddling with default routes manually is not recommended. Use the Router Discovery
-protocol (see Appendix~\ref{EXAMPLE-SETUP}, p.\pageref{EXAMPLE-SETUP})
-instead. Actually, Linux-2.2 IPv6 does not give user level applications
-any access to default routes.
-\end{enumerate}
-
-Certainly, the steps above are not performed exactly
-in this sequence. Instead, the routing table in the kernel is kept
-in some data structure to achieve the final result
-with minimal cost. However, not depending on a particular
-routing algorithm implemented in the kernel, we can summarize
-the statements above as: a route is identified by the triplet
-\{prefix, tos, preference\}. This {\em key\/} lets us locate
-the route in the routing table.
-
-\paragraph{Route attributes:} Each route key refers to a routing
-information record containing
-the data required to deliver IP packets (f.e.\ output device and
-next hop router) and some optional attributes (f.e. the path MTU or
-the preferred source address when communicating with this destination).
-These attributes are described in the following subsection.
-
-\paragraph{Route types:} \label{IP-ROUTE-TYPES}
-It is important that the set
-of required and optional attributes depend on the route {\em type\/}.
-The most important route type
-is \verb|unicast|. It describes real paths to other hosts.
-As a rule, common routing tables contain only such routes. However,
-there are other types of routes with different semantics. The
-full list of types understood by Linux-2.2 is:
-\begin{itemize}
-\item \verb|unicast| --- the route entry describes real paths to the
-destinations covered by the route prefix.
-\item \verb|unreachable| --- these destinations are unreachable. Packets
-are discarded and the ICMP message {\em host unreachable\/} is generated.
-The local senders get an \verb|EHOSTUNREACH| error.
-\item \verb|blackhole| --- these destinations are unreachable. Packets
-are discarded silently. The local senders get an \verb|EINVAL| error.
-\item \verb|prohibit| --- these destinations are unreachable. Packets
-are discarded and the ICMP message {\em communication administratively
-prohibited\/} is generated. The local senders get an \verb|EACCES| error.
-\item \verb|local| --- the destinations are assigned to this
-host. The packets are looped back and delivered locally.
-\item \verb|broadcast| --- the destinations are broadcast addresses.
-The packets are sent as link broadcasts.
-\item \verb|throw| --- a special control route used together with policy
-rules (see sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). If such a route is selected, lookup
-in this table is terminated pretending that no route was found.
-Without policy routing it is equivalent to the absence of the route in the routing
-table. The packets are dropped and the ICMP message {\em net unreachable\/}
-is generated. The local senders get an \verb|ENETUNREACH| error.
-\item \verb|nat| --- a special NAT route. Destinations covered by the prefix
-are considered to be dummy (or external) addresses which require translation
-to real (or internal) ones before forwarding. The addresses to translate to
-are selected with the attribute \verb|via|. More about NAT is
-in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
-\item \verb|anycast| --- ({\em not implemented\/}) the destinations are
-{\em anycast\/} addresses assigned to this host. They are mainly equivalent
-to \verb|local| with one difference: such addresses are invalid when used
-as the source address of any packet.
-\item \verb|multicast| --- a special type used for multicast routing.
-It is not present in normal routing tables.
-\end{itemize}
-
-\paragraph{Route tables:} Linux-2.2 can pack routes into several routing
-tables identified by a number in the range from 1 to 255 or by
-name from the file \verb|/etc/iproute2/rt_tables|. By default all normal
-routes are inserted into the \verb|main| table (ID 254) and the kernel only uses
-this table when calculating routes.
-
-Actually, one other table always exists, which is invisible but
-even more important. It is the \verb|local| table (ID 255). This table
-consists of routes for local and broadcast addresses. The kernel maintains
-this table automatically and the administrator usually need not modify it
-or even look at it.
-
-The multiple routing tables enter the game when {\em policy routing\/}
-is used. See sec.\ref{IP-RULE}, p.\pageref{IP-RULE}.
-In this case, the table identifier effectively becomes
-one more parameter, which should be added to the triplet
-\{prefix, tos, preference\} to uniquely identify the route.
-
-
-\subsection{{\tt ip route add} --- add a new route\\
- {\tt ip route change} --- change a route\\
- {\tt ip route replace} --- change a route or add a new one}
-\label{IP-ROUTE-ADD}
-
-\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
- \verb|replace|, \verb|repl|.
-
-
-\paragraph{Arguments:}
-\begin{itemize}
-\item \verb|to PREFIX| or \verb|to TYPE PREFIX| (default)
-
---- the destination prefix of the route. If \verb|TYPE| is omitted,
-\verb|ip| assumes type \verb|unicast|. Other values of \verb|TYPE|
-are listed above. \verb|PREFIX| is an IP or IPv6 address optionally followed
-by a slash and the prefix length. If the length of the prefix is missing,
-\verb|ip| assumes a full-length host route. There is also a special
-\verb|PREFIX| --- \verb|default| --- which is equivalent to IP \verb|0/0| or
-to IPv6 \verb|::/0|.
-
-\item \verb|tos TOS| or \verb|dsfield TOS|
-
---- the Type Of Service (TOS) key. This key has no associated mask and
-the longest match is understood as: First, compare the TOS
-of the route and of the packet. If they are not equal, then the packet
-may still match a route with a zero TOS. \verb|TOS| is either an 8 bit hexadecimal
-number or an identifier from {\tt /etc/iproute2/rt\_dsfield}.
-
-
-\item \verb|metric NUMBER| or \verb|preference NUMBER|
-
---- the preference value of the route. \verb|NUMBER| is an arbitrary 32bit number.
-
-\item \verb|table TABLEID|
-
---- the table to add this route to.
-\verb|TABLEID| may be a number or a string from the file
-\verb|/etc/iproute2/rt_tables|. If this parameter is omitted,
-\verb|ip| assumes the \verb|main| table, with the exception of
-\verb|local|, \verb|broadcast| and \verb|nat| routes, which are
-put into the \verb|local| table by default.
-
-\item \verb|dev NAME|
-
---- the output device name.
-
-\item \verb|via ADDRESS|
-
---- the address of the nexthop router. Actually, the sense of this field depends
-on the route type. For normal \verb|unicast| routes it is either the true nexthop
-router or, if it is a direct route installed in BSD compatibility mode,
-it can be a local address of the interface.
-For NAT routes it is the first address of the block of translated IP destinations.
-
-\item \verb|src ADDRESS|
-
---- the source address to prefer when sending to the destinations
-covered by the route prefix.
-
-\item \verb|realm REALMID|
-
---- the realm to which this route is assigned.
-\verb|REALMID| may be a number or a string from the file
-\verb|/etc/iproute2/rt_realms|. Sec.\ref{RT-REALMS} (p.\pageref{RT-REALMS})
-contains more information on realms.
-
-\item \verb|mtu MTU| or \verb|mtu lock MTU|
-
---- the MTU along the path to the destination. If the modifier \verb|lock| is
-not used, the MTU may be updated by the kernel due to Path MTU Discovery.
-If the modifier \verb|lock| is used, no path MTU discovery will be tried,
-all packets will be sent without the DF bit in IPv4 case
-or fragmented to MTU for IPv6.
-
-\item \verb|window NUMBER|
-
---- the maximal window for TCP to advertise to these destinations,
-measured in bytes. It limits maximal data bursts that our TCP
-peers are allowed to send to us.
-
-\item \verb|rtt NUMBER|
-
---- the initial RTT (``Round Trip Time'') estimate.
-
-
-\item \verb|rttvar NUMBER|
-
---- \threeonly the initial RTT variance estimate.
-
-
-\item \verb|ssthresh NUMBER|
-
---- \threeonly an estimate for the initial slow start threshold.
-
-
-\item \verb|cwnd NUMBER|
-
---- \threeonly the clamp for congestion window. It is ignored if the \verb|lock|
- flag is not used.
-
-
-\item \verb|advmss NUMBER|
-
---- \threeonly the MSS (``Maximal Segment Size'') to advertise to these
- destinations when establishing TCP connections. If it is not given,
- Linux uses a default value calculated from the first hop device MTU.
-
-\begin{NB}
- If the path to these destination is asymmetric, this guess may be wrong.
-\end{NB}
-
-\item \verb|reordering NUMBER|
-
---- \threeonly Maximal reordering on the path to this destination.
- If it is not given, Linux uses the value selected with \verb|sysctl|
- variable \verb|net/ipv4/tcp_reordering|.
-
-\item \verb|hoplimit NUMBER|
-
---- [2.5.74+ only] Maximum number of hops on the path to this destination.
- The default is the value selected with the \verb|sysctl| variable
- \verb|net/ipv4/ip_default_ttl|.
-
-\item \verb|initcwnd NUMBER|
---- [2.5.70+ only] Initial congestion window size for connections to
- this destination. Actual window size is this value multiplied by the
- MSS (``Maximal Segment Size'') for same connection. The default is
- zero, meaning to use the values specified in~\cite{RFC2414}.
-
-+\item \verb|initrwnd NUMBER|
-
-+--- [2.6.33+ only] Initial receive window size for connections to
-+ this destination. The actual window size is this value multiplied
-+ by the MSS (''Maximal Segment Size'') of the connection. The default
-+ value is zero, meaning to use Slow Start value.
-
-\item \verb|nexthop NEXTHOP|
-
---- the nexthop of a multipath route. \verb|NEXTHOP| is a complex value
-with its own syntax similar to the top level argument lists:
-\begin{itemize}
-\item \verb|via ADDRESS| is the nexthop router.
-\item \verb|dev NAME| is the output device.
-\item \verb|weight NUMBER| is a weight for this element of a multipath
-route reflecting its relative bandwidth or quality.
-\end{itemize}
-
-\item \verb|scope SCOPE_VAL|
-
---- the scope of the destinations covered by the route prefix.
-\verb|SCOPE_VAL| may be a number or a string from the file
-\verb|/etc/iproute2/rt_scopes|.
-If this parameter is omitted,
-\verb|ip| assumes scope \verb|global| for all gatewayed \verb|unicast|
-routes, scope \verb|link| for direct \verb|unicast| and \verb|broadcast| routes
-and scope \verb|host| for \verb|local| routes.
-
-\item \verb|protocol RTPROTO|
-
---- the routing protocol identifier of this route.
-\verb|RTPROTO| may be a number or a string from the file
-\verb|/etc/iproute2/rt_protos|. If the routing protocol ID is
-not given, \verb|ip| assumes protocol \verb|boot| (i.e.\
-it assumes the route was added by someone who doesn't
-understand what they are doing). Several protocol values have a fixed interpretation.
-Namely:
-\begin{itemize}
-\item \verb|redirect| --- the route was installed due to an ICMP redirect.
-\item \verb|kernel| --- the route was installed by the kernel during
-autoconfiguration.
-\item \verb|boot| --- the route was installed during the bootup sequence.
-If a routing daemon starts, it will purge all of them.
-\item \verb|static| --- the route was installed by the administrator
-to override dynamic routing. Routing daemon will respect them
-and, probably, even advertise them to its peers.
-\item \verb|ra| --- the route was installed by Router Discovery protocol.
-\end{itemize}
-The rest of the values are not reserved and the administrator is free
-to assign (or not to assign) protocol tags. At least, routing
-daemons should take care of setting some unique protocol values,
-f.e.\ as they are assigned in \verb|rtnetlink.h| or in \verb|rt_protos|
-database.
-
-
-\item \verb|onlink|
-
---- pretend that the nexthop is directly attached to this link,
-even if it does not match any interface prefix. One application of this
-option may be found in~\cite{IP-TUNNELS}.
-
-\item \verb|pref PREF|
-
---- the IPv6 route preference.
-\verb|PREF| PREF is a string specifying the route preference as defined in
-RFC4191 for Router Discovery messages. Namely:
-\begin{itemize}
-\item \verb|low| --- the route has a lowest priority.
-\item \verb|medium| --- the route has a default priority.
-\item \verb|high| --- the route has a highest priority.
-\end{itemize}
-
-\end{itemize}
-
-
-\begin{NB}
- Actually there are more commands: \verb|prepend| does the same
- thing as classic \verb|route add|, i.e.\ adds a route, even if another
- route to the same destination exists. Its opposite case is \verb|append|,
- which adds the route to the end of the list. Avoid these
- features.
-\end{NB}
-\begin{NB}
- More sad news, IPv6 only understands the \verb|append| command correctly.
- All the others are translated into \verb|append| commands. Certainly,
- this will change in the future.
-\end{NB}
-
-\paragraph{Examples:}
-\begin{itemize}
-\item add a plain route to network 10.0.0/24 via gateway 193.233.7.65
-\begin{verbatim}
- ip route add 10.0.0/24 via 193.233.7.65
-\end{verbatim}
-\item change it to a direct route via the \verb|dummy| device
-\begin{verbatim}
- ip ro chg 10.0.0/24 dev dummy
-\end{verbatim}
-\item add a default multipath route splitting the load between \verb|ppp0|
-and \verb|ppp1|
-\begin{verbatim}
- ip route add default scope global nexthop dev ppp0 \
- nexthop dev ppp1
-\end{verbatim}
-Note the scope value. It is not necessary but it informs the kernel
-that this route is gatewayed rather than direct. Actually, if you
-know the addresses of remote endpoints it would be better to use the
-\verb|via| parameter.
-\item announce that the address 192.203.80.144 is not a real one, but
-should be translated to 193.233.7.83 before forwarding
-\begin{verbatim}
- ip route add nat 192.203.80.144 via 193.233.7.83
-\end{verbatim}
-Backward translation is setup with policy rules described
-in the following section (sec.\ref{IP-RULE}, p.\pageref{IP-RULE}).
-\end{itemize}
-
-\subsection{{\tt ip route delete} --- delete a route}
-
-\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
-
-\paragraph{Arguments:} \verb|ip route del| has the same arguments as
-\verb|ip route add|, but their semantics are a bit different.
-
-Key values (\verb|to|, \verb|tos|, \verb|preference| and \verb|table|)
-select the route to delete. If optional attributes are present, \verb|ip|
-verifies that they coincide with the attributes of the route to delete.
-If no route with the given key and attributes was found, \verb|ip route del|
-fails.
-\begin{NB}
-Linux-2.0 had the option to delete a route selected only by prefix address,
-ignoring its length (i.e.\ netmask). This option no longer exists
-because it was ambiguous. However, look at {\tt ip route flush}
-(sec.\ref{IP-ROUTE-FLUSH}, p.\pageref{IP-ROUTE-FLUSH}) which
-provides similar and even richer functionality.
-\end{NB}
-
-\paragraph{Example:}
-\begin{itemize}
-\item delete the multipath route created by the command in previous subsection
-\begin{verbatim}
- ip route del default scope global nexthop dev ppp0 \
- nexthop dev ppp1
-\end{verbatim}
-\end{itemize}
-
-
-
-\subsection{{\tt ip route show} --- list routes}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
-
-\paragraph{Description:} the command displays the contents of the routing tables
-or the route(s) selected by some criteria.
-
-
-\paragraph{Arguments:}
-\begin{itemize}
-\item \verb|to SELECTOR| (default)
-
---- only select routes from the given range of destinations. \verb|SELECTOR|
-consists of an optional modifier (\verb|root|, \verb|match| or \verb|exact|)
-and a prefix. \verb|root PREFIX| selects routes with prefixes not shorter
-than \verb|PREFIX|. F.e.\ \verb|root 0/0| selects the entire routing table.
-\verb|match PREFIX| selects routes with prefixes not longer than
-\verb|PREFIX|. F.e.\ \verb|match 10.0/16| selects \verb|10.0/16|,
-\verb|10/8| and \verb|0/0|, but it does not select \verb|10.1/16| and
-\verb|10.0.0/24|. And \verb|exact PREFIX| (or just \verb|PREFIX|)
-selects routes with this exact prefix. If neither of these options
-are present, \verb|ip| assumes \verb|root 0/0| i.e.\ it lists the entire table.
-
-
-\item \verb|tos TOS| or \verb|dsfield TOS|
-
- --- only select routes with the given TOS.
-
-
-\item \verb|table TABLEID|
-
- --- show the routes from this table(s). The default setting is to show
-\verb|table| \verb|main|. \verb|TABLEID| may either be the ID of a real table
-or one of the special values:
- \begin{itemize}
- \item \verb|all| --- list all of the tables.
- \item \verb|cache| --- dump the routing cache.
- \end{itemize}
-\begin{NB}
- IPv6 has a single table. However, splitting it into \verb|main|, \verb|local|
- and \verb|cache| is emulated by the \verb|ip| utility.
-\end{NB}
-
-\item \verb|cloned| or \verb|cached|
-
---- list cloned routes i.e.\ routes which were dynamically forked from
-other routes because some route attribute (f.e.\ MTU) was updated.
-Actually, it is equivalent to \verb|table cache|.
-
-\item \verb|from SELECTOR|
-
---- the same syntax as for \verb|to|, but it binds the source address range
-rather than destinations. Note that the \verb|from| option only works with
-cloned routes.
-
-\item \verb|protocol RTPROTO|
-
---- only list routes of this protocol.
-
-
-\item \verb|scope SCOPE_VAL|
-
---- only list routes with this scope.
-
-\item \verb|type TYPE|
-
---- only list routes of this type.
-
-\item \verb|dev NAME|
-
---- only list routes going via this device.
-
-\item \verb|via PREFIX|
-
---- only list routes going via the nexthop routers selected by \verb|PREFIX|.
-
-\item \verb|src PREFIX|
-
---- only list routes with preferred source addresses selected
-by \verb|PREFIX|.
-
-\item \verb|realm REALMID| or \verb|realms FROMREALM/TOREALM|
-
---- only list routes with these realms.
-
-\end{itemize}
-
-\paragraph{Examples:} Let us count routes of protocol \verb|gated/bgp|
-on a router:
-\begin{verbatim}
-kuznet@amber:~ $ ip ro ls proto gated/bgp | wc
- 1413 9891 79010
-kuznet@amber:~ $
-\end{verbatim}
-To count the size of the routing cache, we have to use the \verb|-o| option
-because cached attributes can take more than one line of output:
-\begin{verbatim}
-kuznet@amber:~ $ ip -o ro ls cloned | wc
- 159 2543 18707
-kuznet@amber:~ $
-\end{verbatim}
-
-
-\paragraph{Output format:} The output of this command consists
-of per route records separated by line feeds.
-However, some records may consist
-of more than one line: particularly, this is the case when the route
-is cloned or you requested additional statistics. If the
-\verb|-o| option was given, then line feeds separating lines inside
-records are replaced with the backslash sign.
-
-The output has the same syntax as arguments given to {\tt ip route add},
-so that it can be understood easily. F.e.\
-\begin{verbatim}
-kuznet@amber:~ $ ip ro ls 193.233.7/24
-193.233.7.0/24 dev eth0 proto gated/conn scope link \
- src 193.233.7.65 realms inr.ac
-kuznet@amber:~ $
-\end{verbatim}
-
-If you list cloned entries, the output contains other attributes which
-are evaluated during route calculation and updated during route
-lifetime. An example of the output is:
-\begin{verbatim}
-kuznet@amber:~ $ ip ro ls 193.233.7.82 tab cache
-193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
- realms inr.ac/inr.ac
- cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
-193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
- cache mtu 1500 rtt 300
-kuznet@amber:~ $
-\end{verbatim}
-\begin{NB}
- \label{NB-strange-route}
- The route looks a bit strange, doesn't it? Did you notice that
- it is a path from 193.233.7.82 back to 193.233.82? Well, you will
- see in the section on \verb|ip route get| (p.\pageref{NB-nature-of-strangeness})
- how it appeared.
-\end{NB}
-The second line, starting with the word \verb|cache|, shows
-additional attributes which normal routes do not possess.
-Cached flags are summarized in angle brackets:
-\begin{itemize}
-\item \verb|local| --- packets are delivered locally.
-It stands for loopback unicast routes, for broadcast routes
-and for multicast routes, if this host is a member of the corresponding
-group.
-
-\item \verb|reject| --- the path is bad. Any attempt to use it results
-in an error. See attribute \verb|error| below (p.\pageref{IP-ROUTE-GET-error}).
-
-\item \verb|mc| --- the destination is multicast.
-
-\item \verb|brd| --- the destination is broadcast.
-
-\item \verb|src-direct| --- the source is on a directly connected
-interface.
-
-\item \verb|redirected| --- the route was created by an ICMP Redirect.
-
-\item \verb|redirect| --- packets going via this route will
-trigger an ICMP redirect.
-
-\item \verb|fastroute| --- the route is eligible to be used for fastroute.
-
-\item \verb|equalize| --- make packet by packet randomization
-along this path.
-
-\item \verb|dst-nat| --- the destination address requires translation.
-
-\item \verb|src-nat| --- the source address requires translation.
-
-\item \verb|masq| --- the source address requires masquerading.
-This feature disappeared in linux-2.4.
-
-\item \verb|notify| --- ({\em not implemented}) change/deletion
-of this route will trigger RTNETLINK notification.
-\end{itemize}
-
-Then some optional attributes follow:
-\begin{itemize}
-\item \verb|error| --- on \verb|reject| routes it is error code
-returned to local senders when they try to use this route.
-These error codes are translated into ICMP error codes, sent to remote
-senders, according to the rules described above in the subsection
-devoted to route types (p.\pageref{IP-ROUTE-TYPES}).
-\label{IP-ROUTE-GET-error}
-
-\item \verb|expires| --- this entry will expire after this timeout.
-
-\item \verb|iif| --- the packets for this path are expected to arrive
-on this interface.
-\end{itemize}
-
-\paragraph{Statistics:} With the \verb|-statistics| option, more
-information about this route is shown:
-\begin{itemize}
-\item \verb|users| --- the number of users of this entry.
-\item \verb|age| --- shows when this route was last used.
-\item \verb|used| --- the number of lookups of this route since its creation.
-\end{itemize}
-
-\subsection{{\tt ip route save} -- save routing tables}
-\label{IP-ROUTE-SAVE}
-
-\paragraph{Description:} this command saves the contents of the routing
-tables or the route(s) selected by some criteria to standard output.
-
-\paragraph{Arguments:} \verb|ip route save| has the same arguments as
-\verb|ip route show|.
-
-\paragraph{Example:} This saves all the routes to the {\tt saved\_routes}
-file:
-\begin{verbatim}
-dan@caffeine:~ # ip route save > saved_routes
-\end{verbatim}
-
-\paragraph{Output format:} The format of the data stream provided by
-\verb|ip route save| is that of \verb|rtnetlink|. See
-\verb|rtnetlink(7)| for more information.
-
-\subsection{{\tt ip route restore} -- restore routing tables}
-\label{IP-ROUTE-RESTORE}
-
-\paragraph{Description:} this command restores the contents of the routing
-tables according to a data stream as provided by \verb|ip route save| via
-standard input. Note that any routes already in the table are left unchanged.
-Any routes in the input stream that already exist in the tables are ignored.
-
-\paragraph{Arguments:} This command takes no arguments.
-
-\paragraph{Example:} This restores all routes that were saved to the
-{\tt saved\_routes} file:
-
-\begin{verbatim}
-dan@caffeine:~ # ip route restore < saved_routes
-\end{verbatim}
-
-\subsection{{\tt ip route flush} --- flush routing tables}
-\label{IP-ROUTE-FLUSH}
-
-\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
-
-\paragraph{Description:} this command flushes routes selected
-by some criteria.
-
-\paragraph{Arguments:} the arguments have the same syntax and semantics
-as the arguments of \verb|ip route show|, but routing tables are not
-listed but purged. The only difference is the default action: \verb|show|
-dumps all the IP main routing table but \verb|flush| prints the helper page.
-The reason for this difference does not require any explanation, does it?
-
-
-\paragraph{Statistics:} With the \verb|-statistics| option, the command
-becomes verbose. It prints out the number of deleted routes and the number
-of rounds made to flush the routing table. If the option is given
-twice, \verb|ip route flush| also dumps all the deleted routes
-in the format described in the previous subsection.
-
-\paragraph{Examples:} The first example flushes all the
-gatewayed routes from the main table (f.e.\ after a routing daemon crash).
-\begin{verbatim}
-netadm@amber:~ # ip -4 ro flush scope global type unicast
-\end{verbatim}
-This option deserves to be put into a scriptlet \verb|routef|.
-\begin{NB}
-This option was described in the \verb|route(8)| man page borrowed
-from BSD, but was never implemented in Linux.
-\end{NB}
-
-The second example flushes all IPv6 cloned routes:
-\begin{verbatim}
-netadm@amber:~ # ip -6 -s -s ro flush cache
-3ffe:2400::220:afff:fef4:c5d1 via 3ffe:2400::220:afff:fef4:c5d1 \
- dev eth0 metric 0
- cache used 2 age 12sec mtu 1500 rtt 300
-3ffe:2400::280:adff:feb7:8034 via 3ffe:2400::280:adff:feb7:8034 \
- dev eth0 metric 0
- cache used 2 age 15sec mtu 1500 rtt 300
-3ffe:2400::280:c8ff:fe59:5bcc via 3ffe:2400::280:c8ff:fe59:5bcc \
- dev eth0 metric 0
- cache users 1 used 1 age 23sec mtu 1500 rtt 300
-3ffe:2400:0:1:2a0:ccff:fe66:1878 via 3ffe:2400:0:1:2a0:ccff:fe66:1878 \
- dev eth1 metric 0
- cache used 2 age 20sec mtu 1500 rtt 300
-3ffe:2400:0:1:a00:20ff:fe71:fb30 via 3ffe:2400:0:1:a00:20ff:fe71:fb30 \
- dev eth1 metric 0
- cache used 2 age 33sec mtu 1500 rtt 300
-ff02::1 via ff02::1 dev eth1 metric 0
- cache users 1 used 1 age 45sec mtu 1500 rtt 300
-
-*** Round 1, deleting 6 entries ***
-*** Flush is complete after 1 round ***
-netadm@amber:~ # ip -6 -s -s ro flush cache
-Nothing to flush.
-netadm@amber:~ #
-\end{verbatim}
-
-The third example flushes BGP routing tables after a \verb|gated|
-death.
-\begin{verbatim}
-netadm@amber:~ # ip ro ls proto gated/bgp | wc
- 1408 9856 78730
-netadm@amber:~ # ip -s ro f proto gated/bgp
-
-*** Round 1, deleting 1408 entries ***
-*** Flush is complete after 1 round ***
-netadm@amber:~ # ip ro f proto gated/bgp
-Nothing to flush.
-netadm@amber:~ # ip ro ls proto gated/bgp
-netadm@amber:~ #
-\end{verbatim}
-
-
-\subsection{{\tt ip route get} --- get a single route}
-\label{IP-ROUTE-GET}
-
-\paragraph{Abbreviations:} \verb|get|, \verb|g|.
-
-\paragraph{Description:} this command gets a single route to a destination
-and prints its contents exactly as the kernel sees it.
-
-\paragraph{Arguments:}
-\begin{itemize}
-\item \verb|to ADDRESS| (default)
-
---- the destination address.
-
-\item \verb|from ADDRESS|
-
---- the source address.
-
-\item \verb|tos TOS| or \verb|dsfield TOS|
-
---- the Type Of Service.
-
-\item \verb|iif NAME|
-
---- the device from which this packet is expected to arrive.
-
-\item \verb|oif NAME|
-
---- force the output device on which this packet will be routed.
-
-\item \verb|connected|
-
---- if no source address (option \verb|from|) was given, relookup
-the route with the source set to the preferred address received from the first lookup.
-If policy routing is used, it may be a different route.
-
-\end{itemize}
-
-Note that this operation is not equivalent to \verb|ip route show|.
-\verb|show| shows existing routes. \verb|get| resolves them and
-creates new clones if necessary. Essentially, \verb|get|
-is equivalent to sending a packet along this path.
-If the \verb|iif| argument is not given, the kernel creates a route
-to output packets towards the requested destination.
-This is equivalent to pinging the destination
-with a subsequent {\tt ip route ls cache}, however, no packets are
-actually sent. With the \verb|iif| argument, the kernel pretends
-that a packet arrived from this interface and searches for
-a path to forward the packet.
-
-\paragraph{Output format:} This command outputs routes in the same
-format as \verb|ip route ls|.
-
-\paragraph{Examples:}
-\begin{itemize}
-\item Find a route to output packets to 193.233.7.82:
-\begin{verbatim}
-kuznet@amber:~ $ ip route get 193.233.7.82
-193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
- cache mtu 1500 rtt 300
-kuznet@amber:~ $
-\end{verbatim}
-
-\item Find a route to forward packets arriving on \verb|eth0|
-from 193.233.7.82 and destined for 193.233.7.82:
-\begin{verbatim}
-kuznet@amber:~ $ ip r g 193.233.7.82 from 193.233.7.82 iif eth0
-193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
- realms inr.ac/inr.ac
- cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
-kuznet@amber:~ $
-\end{verbatim}
-\begin{NB}
- \label{NB-nature-of-strangeness}
- This is the command that created the funny route from 193.233.7.82
- looped back to 193.233.7.82 (cf.\ NB on~p.\pageref{NB-strange-route}).
- Note the \verb|redirect| flag on it.
-\end{NB}
-
-\item Find a multicast route for packets arriving on \verb|eth0|
-from host 193.233.7.82 and destined for multicast group 224.2.127.254
-(it is assumed that a multicast routing daemon is running.
-In this case, it is \verb|pimd|)
-\begin{verbatim}
-kuznet@amber:~ $ ip r g 224.2.127.254 from 193.233.7.82 iif eth0
-multicast 224.2.127.254 from 193.233.7.82 dev lo \
- src 193.233.7.65 realms inr.ac/cosmos
- cache <mc> iif eth0 Oifs: eth1 pimreg
-kuznet@amber:~ $
-\end{verbatim}
-This route differs from the ones seen before. It contains a ``normal'' part
-and a ``multicast'' part. The normal part is used to deliver (or not to
-deliver) the packet to local IP listeners. In this case the router
-is not a member
-of this group, so that route has no \verb|local| flag and only
-forwards packets. The output device for such entries is always loopback.
-The multicast part consists of an additional \verb|Oifs:| list showing
-the output interfaces.
-\end{itemize}
-
-
-It is time for a more complicated example. Let us add an invalid
-gatewayed route for a destination which is really directly connected:
-\begin{verbatim}
-netadm@alisa:~ # ip route add 193.233.7.98 via 193.233.7.254
-netadm@alisa:~ # ip route get 193.233.7.98
-193.233.7.98 via 193.233.7.254 dev eth0 src 193.233.7.90
- cache mtu 1500 rtt 3072
-netadm@alisa:~ #
-\end{verbatim}
-and probe it with ping:
-\begin{verbatim}
-netadm@alisa:~ # ping -n 193.233.7.98
-PING 193.233.7.98 (193.233.7.98) from 193.233.7.90 : 56 data bytes
-From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
-64 bytes from 193.233.7.98: icmp_seq=0 ttl=255 time=3.5 ms
-From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
-64 bytes from 193.233.7.98: icmp_seq=1 ttl=255 time=2.2 ms
-64 bytes from 193.233.7.98: icmp_seq=2 ttl=255 time=0.4 ms
-64 bytes from 193.233.7.98: icmp_seq=3 ttl=255 time=0.4 ms
-64 bytes from 193.233.7.98: icmp_seq=4 ttl=255 time=0.4 ms
-^C
---- 193.233.7.98 ping statistics ---
-5 packets transmitted, 5 packets received, 0% packet loss
-round-trip min/avg/max = 0.4/1.3/3.5 ms
-netadm@alisa:~ #
-\end{verbatim}
-What happened? Router 193.233.7.254 understood that we have a much
-better path to the destination and sent us an ICMP redirect message.
-We may retry \verb|ip route get| to see what we have in the routing
-tables now:
-\begin{verbatim}
-netadm@alisa:~ # ip route get 193.233.7.98
-193.233.7.98 dev eth0 src 193.233.7.90
- cache <redirected> mtu 1500 rtt 3072
-netadm@alisa:~ #
-\end{verbatim}
-
-
-
-\section{{\tt ip rule} --- routing policy database management}
-\label{IP-RULE}
-
-\paragraph{Abbreviations:} \verb|rule|, \verb|ru|.
-
-\paragraph{Object:} \verb|rule|s in the routing policy database control
-the route selection algorithm.
-
-Classic routing algorithms used in the Internet make routing decisions
-based only on the destination address of packets (and in theory,
-but not in practice, on the TOS field). The seminal review of classic
-routing algorithms and their modifications can be found in~\cite{RFC1812}.
-
-In some circumstances we want to route packets differently depending not only
-on destination addresses, but also on other packet fields: source address,
-IP protocol, transport protocol ports or even packet payload.
-This task is called ``policy routing''.
-
-\begin{NB}
- ``policy routing'' $\neq$ ``routing policy''.
-
-\noindent ``policy routing'' $=$ ``cunning routing''.
-
-\noindent ``routing policy'' $=$ ``routing tactics'' or ``routing plan''.
-\end{NB}
-
-To solve this task, the conventional destination based routing table, ordered
-according to the longest match rule, is replaced with a ``routing policy
-database'' (or RPDB), which selects routes
-by executing some set of rules. The rules may have lots of keys of different
-natures and therefore they have no natural ordering, but one imposed
-by the administrator. Linux-2.2 RPDB is a linear list of rules
-ordered by numeric priority value.
-RPDB explicitly allows matching a few packet fields:
-
-\begin{itemize}
-\item packet source address.
-\item packet destination address.
-\item TOS.
-\item incoming interface (which is packet metadata, rather than a packet field).
-\end{itemize}
-
-Matching IP protocols and transport ports is also possible,
-indirectly, via \verb|ipchains|, by exploiting their ability
-to mark some classes of packets with \verb|fwmark|. Therefore,
-\verb|fwmark| is also included in the set of keys checked by rules.
-
-Each policy routing rule consists of a {\em selector\/} and an {\em action\/}
-predicate. The RPDB is scanned in the order of increasing priority. The selector
-of each rule is applied to \{source address, destination address, incoming
-interface, tos, fwmark\} and, if the selector matches the packet,
-the action is performed. The action predicate may return with success.
-In this case, it will either give a route or failure indication
-and the RPDB lookup is terminated. Otherwise, the RPDB program
-continues on the next rule.
-
-What is the action, semantically? The natural action is to select the
-nexthop and the output device. This is what
-Cisco IOS~\cite{IOS} does. Let us call it ``match \& set''.
-The Linux-2.2 approach is more flexible. The action includes
-lookups in destination-based routing tables and selecting
-a route from these tables according to the classic longest match algorithm.
-The ``match \& set'' approach is the simplest case of the Linux one. It is realized
-when a second level routing table contains a single default route.
-Recall that Linux-2.2 supports multiple tables
-managed with the \verb|ip route| command, described in the previous section.
-
-At startup time the kernel configures the default RPDB consisting of three
-rules:
-
-\begin{enumerate}
-\item Priority: 0, Selector: match anything, Action: lookup routing
-table \verb|local| (ID 255).
-The \verb|local| table is a special routing table containing
-high priority control routes for local and broadcast addresses.
-
-\item Priority: 32766, Selector: match anything, Action: lookup routing
-table \verb|main| (ID 254).
-The \verb|main| table is the normal routing table containing all non-policy
-routes. This rule may be deleted and/or overridden with other
-ones by the administrator.
-
-\item Priority: 32767, Selector: match anything, Action: lookup routing
-table \verb|default| (ID 253).
-The \verb|default| table is empty. It is reserved for some
-post-processing if no previous default rules selected the packet.
-This rule may also be deleted.
-
-\end{enumerate}
-
-Do not confuse routing tables with rules: rules point to routing tables,
-several rules may refer to one routing table and some routing tables
-may have no rules pointing to them. If the administrator deletes all the rules
-referring to a table, the table is not used, but it still exists
-and will disappear only after all the routes contained in it are deleted.
-
-
-\paragraph{Rule attributes:} Each RPDB entry has additional
-attributes. F.e.\ each rule has a pointer to some routing
-table. NAT and masquerading rules have an attribute to select new IP
-address to translate/masquerade. Besides that, rules have some
-optional attributes, which routes have, namely \verb|realms|.
-These values do not override those contained in the routing tables. They
-are only used if the route did not select any attributes.
-
-
-\paragraph{Rule types:} The RPDB may contain rules of the following
-types:
-\begin{itemize}
-\item \verb|unicast| --- the rule prescribes to return the route found
-in the routing table referenced by the rule.
-\item \verb|blackhole| --- the rule prescribes to silently drop the packet.
-\item \verb|unreachable| --- the rule prescribes to generate a ``Network
-is unreachable'' error.
-\item \verb|prohibit| --- the rule prescribes to generate
-``Communication is administratively prohibited'' error.
-\item \verb|nat| --- the rule prescribes to translate the source address
-of the IP packet into some other value. More about NAT is
-in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
-\end{itemize}
-
-
-\paragraph{Commands:} \verb|add|, \verb|delete| and \verb|show|
-(or \verb|list|).
-
-\subsection{{\tt ip rule add} --- insert a new rule\\
- {\tt ip rule delete} --- delete a rule}
-\label{IP-RULE-ADD}
-
-\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|,
- \verb|d|.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-\item \verb|type TYPE| (default)
-
---- the type of this rule. The list of valid types was given in the previous
-subsection.
-
-\item \verb|from PREFIX|
-
---- select the source prefix to match.
-
-\item \verb|to PREFIX|
-
---- select the destination prefix to match.
-
-\item \verb|iif NAME|
-
---- select the incoming device to match. If the interface is loopback,
-the rule only matches packets originating from this host. This means that you
-may create separate routing tables for forwarded and local packets and,
-hence, completely segregate them.
-
-\item \verb|tos TOS| or \verb|dsfield TOS|
-
---- select the TOS value to match.
-
-\item \verb|fwmark MARK|
-
---- select the \verb|fwmark| value to match.
-
-\item \verb|priority PREFERENCE|
-
---- the priority of this rule. Each rule should have an explicitly
-set {\em unique\/} priority value.
-\begin{NB}
- Really, for historical reasons \verb|ip rule add| does not require a
- priority value and allows them to be non-unique.
- If the user does not supplied a priority, it is selected by the kernel.
- If the user creates a rule with a priority value that
- already exists, the kernel does not reject the request. It adds
- the new rule before all old rules of the same priority.
-
- It is mistake in design, no more. And it will be fixed one day,
- so do not rely on this feature. Use explicit priorities.
-\end{NB}
-
-
-\item \verb|table TABLEID|
-
---- the routing table identifier to lookup if the rule selector matches.
-
-\item \verb|realms FROM/TO|
-
---- Realms to select if the rule matched and the routing table lookup
-succeeded. Realm \verb|TO| is only used if the route did not select
-any realm.
-
-\item \verb|nat ADDRESS|
-
---- The base of the IP address block to translate (for source addresses).
-The \verb|ADDRESS| may be either the start of the block of NAT addresses
-(selected by NAT routes) or in linux-2.2 a local host address (or even zero).
-In the last case the router does not translate the packets,
-but masquerades them to this address; this feature disappered in 2.4.
-More about NAT is in Appendix~\ref{ROUTE-NAT},
-p.\pageref{ROUTE-NAT}.
-
-\end{itemize}
-
-\paragraph{Warning:} Changes to the RPDB made with these commands
-do not become active immediately. It is assumed that after
-a script finishes a batch of updates, it flushes the routing cache
-with \verb|ip route flush cache|.
-
-\paragraph{Examples:}
-\begin{itemize}
-\item Route packets with source addresses from 192.203.80/24
-according to routing table \verb|inr.ruhep|:
-\begin{verbatim}
-ip ru add from 192.203.80.0/24 table inr.ruhep prio 220
-\end{verbatim}
-
-\item Translate packet source address 193.233.7.83 into 192.203.80.144
-and route it according to table \#1 (actually, it is \verb|inr.ruhep|):
-\begin{verbatim}
-ip ru add from 193.233.7.83 nat 192.203.80.144 table 1 prio 320
-\end{verbatim}
-
-\item Delete the unused default rule:
-\begin{verbatim}
-ip ru del prio 32767
-\end{verbatim}
-
-\end{itemize}
-
-
-
-\subsection{{\tt ip rule show} --- list rules}
-\label{IP-RULE-SHOW}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
-
-
-\paragraph{Arguments:} Good news, this is one command that has no arguments.
-
-\paragraph{Output format:}
-
-\begin{verbatim}
-kuznet@amber:~ $ ip ru ls
-0: from all lookup local
-200: from 192.203.80.0/24 to 193.233.7.0/24 lookup main
-210: from 192.203.80.0/24 to 192.203.80.0/24 lookup main
-220: from 192.203.80.0/24 lookup inr.ruhep realms inr.ruhep/radio-msu
-300: from 193.233.7.83 to 193.233.7.0/24 lookup main
-310: from 193.233.7.83 to 192.203.80.0/24 lookup main
-320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
-32766: from all lookup main
-kuznet@amber:~ $
-\end{verbatim}
-
-In the first column is the rule priority value followed
-by a colon. Then the selectors follow. Each key is prefixed
-with the same keyword that was used to create the rule.
-
-The keyword \verb|lookup| is followed by a routing table identifier,
-as it is recorded in the file \verb|/etc/iproute2/rt_tables|.
-
-If the rule does NAT (f.e.\ rule \#320), it is shown by the keyword
-\verb|map-to| followed by the start of the block of addresses to map.
-
-The sense of this example is pretty simple. The prefixes
-192.203.80.0/24 and 193.233.7.0/24 form the internal network, but
-they are routed differently when the packets leave it.
-Besides that, the host 193.233.7.83 is translated into
-another prefix to look like 192.203.80.144 when talking
-to the outer world.
-
-\subsection{{\tt ip rule save} -- save rules tables}
-\label{IP-RULE-SAVE}
-
-\paragraph{Description:} this command saves the contents of the rules
-tables or the rule(s) selected by some criteria to standard output.
-
-\paragraph{Arguments:} \verb|ip rule save| has the same arguments as
-\verb|ip rule show|.
-
-\paragraph{Example:} This saves all the rules to the {\tt saved\_rules}
-file:
-\begin{verbatim}
-dan@caffeine:~ # ip rule save > saved_rules
-\end{verbatim}
-
-\paragraph{Output format:} The format of the data stream provided by
-\verb|ip rule save| is that of \verb|rtnetlink|. See
-\verb|rtnetlink(7)| for more information.
-
-\subsection{{\tt ip rule restore} -- restore rules tables}
-\label{IP-RULE-RESTORE}
-
-\paragraph{Description:} this command restores the contents of the rules
-tables according to a data stream as provided by \verb|ip rule save| via
-standard input. Note that any rules already in the table are left unchanged,
-and duplicates are not ignored.
-
-\paragraph{Arguments:} This command takes no arguments.
-
-\paragraph{Example:} This restores all rules that were saved to the
-{\tt saved\_rules} file:
-
-\begin{verbatim}
-dan@caffeine:~ # ip rule restore < saved_rules
-\end{verbatim}
-
-
-
-\section{{\tt ip maddress} --- multicast addresses management}
-\label{IP-MADDR}
-
-\paragraph{Object:} \verb|maddress| objects are multicast addresses.
-
-\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|show| (or \verb|list|).
-
-\subsection{{\tt ip maddress show} --- list multicast addresses}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-
-\item \verb|dev NAME| (default)
-
---- the device name.
-
-\end{itemize}
-
-\paragraph{Output format:}
-
-\begin{verbatim}
-kuznet@alisa:~ $ ip maddr ls dummy
-2: dummy
- link 33:33:00:00:00:01
- link 01:00:5e:00:00:01
- inet 224.0.0.1 users 2
- inet6 ff02::1
-kuznet@alisa:~ $
-\end{verbatim}
-
-The first line of the output shows the interface index and its name.
-Then the multicast address list follows. Each line starts with the
-protocol identifier. The word \verb|link| denotes a link layer
-multicast addresses.
-
-If a multicast address has more than one user, the number
-of users is shown after the \verb|users| keyword.
-
-One additional feature not present in the example above
-is the \verb|static| flag, which indicates that the address was joined
-with \verb|ip maddr add|. See the following subsection.
-
-
-
-\subsection{{\tt ip maddress add} --- add a multicast address\\
- {\tt ip maddress delete} --- delete a multicast address}
-
-\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|, \verb|d|.
-
-\paragraph{Description:} these commands attach/detach
-a static link layer multicast address to listen on the interface.
-Note that it is impossible to join protocol multicast groups
-statically. This command only manages link layer addresses.
-
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-\item \verb|address LLADDRESS| (default)
-
---- the link layer multicast address.
-
-\item \verb|dev NAME|
-
---- the device to join/leave this multicast address.
-
-\end{itemize}
-
-
-\paragraph{Example:} Let us continue with the example from the previous subsection.
-
-\begin{verbatim}
-netadm@alisa:~ # ip maddr add 33:33:00:00:00:01 dev dummy
-netadm@alisa:~ # ip -0 maddr ls dummy
-2: dummy
- link 33:33:00:00:00:01 users 2 static
- link 01:00:5e:00:00:01
-netadm@alisa:~ # ip maddr del 33:33:00:00:00:01 dev dummy
-\end{verbatim}
-
-\begin{NB}
- Neither \verb|ip| nor the kernel check for multicast address validity.
- Particularly, this means that you can try to load a unicast address
- instead of a multicast address. Most drivers will ignore such addresses,
- but several (f.e.\ Tulip) will intern it to their on-board filter.
- The effects may be strange. Namely, the addresses become additional
- local link addresses and, if you loaded the address of another host
- to the router, wait for duplicated packets on the wire.
- It is not a bug, but rather a hole in the API and intra-kernel interfaces.
- This feature is really more useful for traffic monitoring, but using it
- with Linux-2.2 you {\em have to\/} be sure that the host is not
- a router and, especially, that it is not a transparent proxy or masquerading
- agent.
-\end{NB}
-
-
-
-\section{{\tt ip mroute} --- multicast routing cache management}
-\label{IP-MROUTE}
-
-\paragraph{Abbreviations:} \verb|mroute|, \verb|mr|.
-
-\paragraph{Object:} \verb|mroute| objects are multicast routing cache
-entries created by a user level mrouting daemon
-(f.e.\ \verb|pimd| or \verb|mrouted|).
-
-Due to the limitations of the current interface to the multicast routing
-engine, it is impossible to change \verb|mroute| objects administratively,
-so we may only display them. This limitation will be removed
-in the future.
-
-\paragraph{Commands:} \verb|show| (or \verb|list|).
-
-
-\subsection{{\tt ip mroute show} --- list mroute cache entries}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-\item \verb|to PREFIX| (default)
-
---- the prefix selecting the destination multicast addresses to list.
-
-
-\item \verb|iif NAME|
-
---- the interface on which multicast packets are received.
-
-
-\item \verb|from PREFIX|
-
---- the prefix selecting the IP source addresses of the multicast route.
-
-
-\end{itemize}
-
-\paragraph{Output format:}
-
-\begin{verbatim}
-kuznet@amber:~ $ ip mroute ls
-(193.232.127.6, 224.0.1.39) Iif: unresolved
-(193.232.244.34, 224.0.1.40) Iif: unresolved
-(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
-kuznet@amber:~ $
-\end{verbatim}
-
-Each line shows one (S,G) entry in the multicast routing cache,
-where S is the source address and G is the multicast group. \verb|Iif| is
-the interface on which multicast packets are expected to arrive.
-If the word \verb|unresolved| is there instead of the interface name,
-it means that the routing daemon still hasn't resolved this entry.
-The keyword \verb|oifs| is followed by a list of output interfaces, separated
-by spaces. If a multicast routing entry is created with non-trivial
-TTL scope, administrative distances are appended to the device names
-in the \verb|oifs| list.
-
-\paragraph{Statistics:} The \verb|-statistics| option also prints the
-number of packets and bytes forwarded along this route and
-the number of packets that arrived on the wrong interface, if this number is not zero.
-
-\begin{verbatim}
-kuznet@amber:~ $ ip -s mr ls 224.66/16
-(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
- 9383 packets, 300256 bytes
-kuznet@amber:~ $
-\end{verbatim}
-
-
-\section{{\tt ip tunnel} --- tunnel configuration}
-\label{IP-TUNNEL}
-
-\paragraph{Abbreviations:} \verb|tunnel|, \verb|tunl|.
-
-\paragraph{Object:} \verb|tunnel| objects are tunnels, encapsulating
-packets in IPv4 packets and then sending them over the IP infrastructure.
-
-\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|change|, \verb|show|
-(or \verb|list|).
-
-\paragraph{See also:} A more informal discussion of tunneling
-over IP and the \verb|ip tunnel| command can be found in~\cite{IP-TUNNELS}.
-
-\subsection{{\tt ip tunnel add} --- add a new tunnel\\
- {\tt ip tunnel change} --- change an existing tunnel\\
- {\tt ip tunnel delete} --- destroy a tunnel}
-
-\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
-\verb|delete|, \verb|del|, \verb|d|.
-
-
-\paragraph{Arguments:}
-
-\begin{itemize}
-
-\item \verb|name NAME| (default)
-
---- select the tunnel device name.
-
-\item \verb|mode MODE|
-
---- set the tunnel mode. Three modes are currently available:
- \verb|ipip|, \verb|sit| and \verb|gre|.
-
-\item \verb|remote ADDRESS|
-
---- set the remote endpoint of the tunnel.
-
-\item \verb|local ADDRESS|
-
---- set the fixed local address for tunneled packets.
-It must be an address on another interface of this host.
-
-\item \verb|ttl N|
-
---- set a fixed TTL \verb|N| on tunneled packets.
- \verb|N| is a number in the range 1--255. 0 is a special value
- meaning that packets inherit the TTL value.
- The default value is: \verb|inherit|.
-
-\item \verb|tos T| or \verb|dsfield T|
-
---- set a fixed TOS \verb|T| on tunneled packets.
- The default value is: \verb|inherit|.
-
-
-
-\item \verb|dev NAME|
-
---- bind the tunnel to the device \verb|NAME| so that
- tunneled packets will only be routed via this device and will
- not be able to escape to another device when the route to endpoint changes.
-
-\item \verb|nopmtudisc|
-
---- disable Path MTU Discovery on this tunnel.
- It is enabled by default. Note that a fixed ttl is incompatible
- with this option: tunnelling with a fixed ttl always makes pmtu discovery.
-
-\item \verb|ignore-df|
-
---- (only GRE tunnels) enable IPv4 DF flag suppression on this tunnel.
- If is disabled by default. Enabling this option will cause IPv4
- payloads to be handled like any other GRE payload,
- regardless of the DF flag.
-
-\item \verb|key K|, \verb|ikey K|, \verb|okey K|
-
---- (only GRE tunnels) use keyed GRE with key \verb|K|. \verb|K| is
- either a number or an IP address-like dotted quad.
- The \verb|key| parameter sets the key to use in both directions.
- The \verb|ikey| and \verb|okey| parameters set different keys for input and output.
-
-
-\item \verb|csum|, \verb|icsum|, \verb|ocsum|
-
---- (only GRE tunnels) generate/require checksums for tunneled packets.
- The \verb|ocsum| flag calculates checksums for outgoing packets.
- The \verb|icsum| flag requires that all input packets have the correct
- checksum. The \verb|csum| flag is equivalent to the combination
- ``\verb|icsum| \verb|ocsum|''.
-
-\item \verb|seq|, \verb|iseq|, \verb|oseq|
-
---- (only GRE tunnels) serialize packets.
- The \verb|oseq| flag enables sequencing of outgoing packets.
- The \verb|iseq| flag requires that all input packets are serialized.
- The \verb|seq| flag is equivalent to the combination ``\verb|iseq| \verb|oseq|''.
-
-\begin{NB}
- I think this option does not
- work. At least, I did not test it, did not debug it and
- do not even understand how it is supposed to work or for what
- purpose Cisco planned to use it. Do not use it.
-\end{NB}
-
-
-\end{itemize}
-
-\paragraph{Example:} Create a pointopoint IPv6 tunnel with maximal TTL of 32.
-\begin{verbatim}
-netadm@amber:~ # ip tunl add Cisco mode sit remote 192.31.7.104 \
- local 192.203.80.142 ttl 32
-\end{verbatim}
-
-\subsection{{\tt ip tunnel show} --- list tunnels}
-
-\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
-
-
-\paragraph{Arguments:} None.
-
-\paragraph{Output format:}
-\begin{verbatim}
-kuznet@amber:~ $ ip tunl ls Cisco
-Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
-kuznet@amber:~ $
-\end{verbatim}
-The line starts with the tunnel device name followed by a colon.
-Then the tunnel mode follows. The parameters of the tunnel are listed
-with the same keywords that were used when creating the tunnel.
-
-\paragraph{Statistics:}
-
-\begin{verbatim}
-kuznet@amber:~ $ ip -s tunl ls Cisco
-Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
-RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts
- 12566 1707516 0 0 0 0
-TX: Packets Bytes Errors DeadLoop NoRoute NoBufs
- 13445 1879677 0 0 0 0
-kuznet@amber:~ $
-\end{verbatim}
-Essentially, these numbers are the same as the numbers
-printed with {\tt ip -s link show}
-(sec.\ref{IP-LINK-SHOW}, p.\pageref{IP-LINK-SHOW}) but the tags are different
-to reflect that they are tunnel specific.
-\begin{itemize}
-\item \verb|CsumErrs| --- the total number of packets dropped
-because of checksum failures for a GRE tunnel with checksumming enabled.
-\item \verb|OutOfSeq| --- the total number of packets dropped
-because they arrived out of sequence for a GRE tunnel with
-serialization enabled.
-\item \verb|Mcasts| --- the total number of multicast packets
-received on a broadcast GRE tunnel.
-\item \verb|DeadLoop| --- the total number of packets which were not
-transmitted because the tunnel is looped back to itself.
-\item \verb|NoRoute| --- the total number of packets which were not
-transmitted because there is no IP route to the remote endpoint.
-\item \verb|NoBufs| --- the total number of packets which were not
-transmitted because the kernel failed to allocate a buffer.
-\end{itemize}
-
-
-\section{{\tt ip monitor} and {\tt rtmon} --- state monitoring}
-\label{IP-MONITOR}
-
-The \verb|ip| utility can monitor the state of devices, addresses
-and routes continuously. This option has a slightly different format.
-Namely,
-the \verb|monitor| command is the first in the command line and then
-the object list follows:
-\begin{verbatim}
- ip monitor [ file FILE ] [ all | OBJECT-LIST ] [ label ]
-\end{verbatim}
-\verb|OBJECT-LIST| is the list of object types that we want to
-monitor. It may contain \verb|link|, \verb|address| and \verb|route|.
-Specifying \verb|label| indicates that output lines should be labelled
-with the type of object being printed --- this happens by default if
-\verb|all| is specified. If no \verb|file| argument is given,
-\verb|ip| opens RTNETLINK, listens on it and dumps state changes in
-the format described in previous sections.
-
-If a file name is given, it does not listen on RTNETLINK,
-but opens the file containing RTNETLINK messages saved in binary format
-and dumps them. Such a history file can be generated with the
-\verb|rtmon| utility. This utility has a command line syntax similar to
-\verb|ip monitor|.
-Ideally, \verb|rtmon| should be started before
-the first network configuration command is issued. F.e.\ if
-you insert:
-\begin{verbatim}
- rtmon file /var/log/rtmon.log
-\end{verbatim}
-in a startup script, you will be able to view the full history
-later.
-
-Certainly, it is possible to start \verb|rtmon| at any time.
-It prepends the history with the state snapshot dumped at the moment
-of starting.
-
-
-\section{Route realms and policy propagation, {\tt rtacct}}
-\label{RT-REALMS}
-
-On routers using OSPF ASE or, especially, the BGP protocol, routing
-tables may be huge. If we want to classify or to account for the packets
-per route, we will have to keep lots of information. Even worse, if we
-want to distinguish the packets not only by their destination, but
-also by their source, the task gets quadratic complexity and its solution
-is physically impossible.
-
-One approach to propagating the policy from routing protocols
-to the forwarding engine has been proposed in~\cite{IOS-BGP-PP}.
-Essentially, Cisco Policy Propagation via BGP is based on the fact
-that dedicated routers all have the RIB (Routing Information Base)
-close to the forwarding engine, so policy routing rules can
-check all the route attributes, including ASPATH information
-and community strings.
-
-The Linux architecture, splitting the RIB (maintained by a user level
-daemon) and the kernel based FIB (Forwarding Information Base),
-does not allow such a simple approach.
-
-It is to our fortune because there is another solution
-which allows even more flexible policy and richer semantics.
-
-Namely, routes can be clustered together in user space, based on their
-attributes. F.e.\ a BGP router knows route ASPATH, its community;
-an OSPF router knows the route tag or its area. The administrator, when adding
-routes manually, also knows their nature. Providing that the number of such
-aggregates (we call them {\em realms\/}) is low, the task of full
-classification both by source and destination becomes quite manageable.
-
-So each route may be assigned to a realm. It is assumed that
-this identification is made by a routing daemon, but static routes
-can also be handled manually with \verb|ip route| (see sec.\ref{IP-ROUTE},
-p.\pageref{IP-ROUTE}).
-\begin{NB}
- There is a patch to \verb|gated|, allowing classification of routes
- to realms with all the set of policy rules implemented in \verb|gated|:
- by prefix, by ASPATH, by origin, by tag etc.
-\end{NB}
-
-To facilitate the construction (f.e.\ in case the routing
-daemon is not aware of realms), missing realms may be completed
-with routing policy rules, see sec.~\ref{IP-RULE}, p.\pageref{IP-RULE}.
-
-For each packet the kernel calculates a tuple of realms: source realm
-and destination realm, using the following algorithm:
-
-\begin{enumerate}
-\item If the route has a realm, the destination realm of the packet is set to it.
-\item If the rule has a source realm, the source realm of the packet is set to it.
-If the destination realm was not inherited from the route and the rule has a destination realm,
-it is also set.
-\item If at least one of the realms is still unknown, the kernel finds
-the reversed route to the source of the packet.
-\item If the source realm is still unknown, get it from the reversed route.
-\item If one of the realms is still unknown, swap the realms of reversed
-routes and apply step 2 again.
-\end{enumerate}
-
-After this procedure is completed we know what realm the packet
-arrived from and the realm where it is going to propagate to.
-If some of the realms are unknown, they are initialized to zero
-(or realm \verb|unknown|).
-
-The main application of realms is the TC \verb|route| classifier~\cite{TC-CREF},
-where they are used to help assign packets to traffic classes,
-to account, police and schedule them according to this
-classification.
-
-A much simpler but still very useful application is incoming packet
-accounting by realms. The kernel gathers a packet statistics summary
-which can be viewed with the \verb|rtacct| utility.
-\begin{verbatim}
-kuznet@amber:~ $ rtacct russia
-Realm BytesTo PktsTo BytesFrom PktsFrom
-russia 20576778 169176 47080168 153805
-kuznet@amber:~ $
-\end{verbatim}
-This shows that this router received 153805 packets from
-the realm \verb|russia| and forwarded 169176 packets to \verb|russia|.
-The realm \verb|russia| consists of routes with ASPATHs not leaving
-Russia.
-
-Note that locally originating packets are not accounted here,
-\verb|rtacct| shows incoming packets only. Using the \verb|route|
-classifier (see~\cite{TC-CREF}) you can get even more detailed
-accounting information about outgoing packets, optionally
-summarizing traffic not only by source or destination, but
-by any pair of source and destination realms.
-
-
-\begin{thebibliography}{99}
-\addcontentsline{toc}{section}{References}
-\bibitem{RFC-NDISC} T.~Narten, E.~Nordmark, W.~Simpson.
-``Neighbor Discovery for IP Version 6 (IPv6)'', RFC-2461.
-
-\bibitem{RFC-ADDRCONF} S.~Thomson, T.~Narten.
-``IPv6 Stateless Address Autoconfiguration'', RFC-2462.
-
-\bibitem{RFC1812} F.~Baker.
-``Requirements for IP Version 4 Routers'', RFC-1812.
-
-\bibitem{RFC1122} R.~T.~Braden.
-``Requirements for Internet hosts --- communication layers'', RFC-1122.
-
-\bibitem{IOS} ``Cisco IOS Release 12.0 Network Protocols
-Command Reference, Part 1'' and
-``Cisco IOS Release 12.0 Quality of Service Solutions
-Configuration Guide: Configuring Policy-Based Routing'',\\
-http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
-
-\bibitem{IP-TUNNELS} A.~N.~Kuznetsov.
-``Tunnels over IP in Linux-2.2'', \\
-In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
-
-\bibitem{TC-CREF} A.~N.~Kuznetsov. ``TC Command Reference'',\\
-In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
-
-\bibitem{IOS-BGP-PP} ``Cisco IOS Release 12.0 Quality of Service Solutions
-Configuration Guide: Configuring QoS Policy Propagation via
-Border Gateway Protocol'',\\
-http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
-
-\bibitem{RFC-DHCP} R.~Droms.
-``Dynamic Host Configuration Protocol.'', RFC-2131
-
-\bibitem{RFC2414} M.~Allman, S.~Floyd, C.~Partridge.
-``Increasing TCP's Initial Window'', RFC-2414.
-
-\end{thebibliography}
-
-
-
-
-\appendix
-\addcontentsline{toc}{section}{Appendix}
-
-\section{Source address selection}
-\label{ADDR-SEL}
-
-When a host creates an IP packet, it must select some source
-address. Correct source address selection is a critical procedure,
-because it gives the receiver the information needed to deliver a
-reply. If the source is selected incorrectly, in the best case,
-the backward path may appear different to the forward one which
-is harmful for performance. In the worst case, when the addresses
-are administratively scoped, the reply may be lost entirely.
-
-Linux-2.2 selects source addresses using the following algorithm:
-
-\begin{itemize}
-\item
-The application may select a source address explicitly with \verb|bind(2)|
-syscall or supplying it to \verb|sendmsg(2)| via the ancillary data object
-\verb|IP_PKTINFO|. In this case the kernel only checks the validity
-of the address and never tries to ``improve'' an incorrect user choice,
-generating an error instead.
-\begin{NB}
- Never say ``Never''. The sysctl option \verb|ip_dynaddr| breaks
- this axiom. It has been made deliberately with the purpose
- of automatically reselecting the address on hosts with dynamic dial-out interfaces.
- However, this hack {\em must not\/} be used on multihomed hosts
- and especially on routers: it would break them.
-\end{NB}
-
-
-\item Otherwise, IP routing tables can contain an explicit source
-address hint for this destination. The hint is set with the \verb|src| parameter
-to the \verb|ip route| command, sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}.
-
-
-\item Otherwise, the kernel searches through the list of addresses
-attached to the interface through which the packets will be routed.
-The search strategies are different for IP and IPv6. Namely:
-
-\begin{itemize}
-\item IPv6 searches for the first valid, not deprecated address
-with the same scope as the destination.
-
-\item IP searches for the first valid address with a scope wider
-than the scope of the destination but it prefers addresses
-which fall to the same subnet as the nexthop of the route
-to the destination. Unlike IPv6, the scopes of IPv4 destinations
-are not encoded in their addresses but are supplied
-in routing tables instead (the \verb|scope| parameter to the \verb|ip route| command,
-sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}).
-
-\end{itemize}
-
-
-\item Otherwise, if the scope of the destination is \verb|link| or \verb|host|,
-the algorithm fails and returns a zero source address.
-
-\item Otherwise, all interfaces are scanned to search for an address
-with an appropriate scope. The loopback device \verb|lo| is always the first
-in the search list, so that if an address with global scope (not 127.0.0.1!)
-is configured on loopback, it is always preferred.
-
-\end{itemize}
-
-
-\section{Proxy ARP/NDISC}
-\label{PROXY-NEIGH}
-
-Routers may answer ARP/NDISC solicitations on behalf of other hosts.
-In Linux-2.2 proxy ARP on an interface may be enabled
-by setting the kernel \verb|sysctl| variable
-\verb|/proc/sys/net/ipv4/conf/<dev>/proxy_arp| to 1. After this, the router
-starts to answer ARP requests on the interface \verb|<dev>|, provided
-the route to the requested destination does {\em not\/} go back via the same
-device.
-
-The variable \verb|/proc/sys/net/ipv4/conf/all/proxy_arp| enables proxy
-ARP on all the IP devices.
-
-However, this approach fails in the case of IPv6 because the router
-must join the solicited node multicast address to listen for the corresponding
-NDISC queries. It means that proxy NDISC is possible only on a per destination
-basis.
-
-Logically, proxy ARP/NDISC is not a kernel task. It can easily be implemented
-in user space. However, similar functionality was present in BSD kernels
-and in Linux-2.0, so we have to preserve it at least to the extent that
-is standardized in BSD.
-\begin{NB}
- Linux-2.0 ARP had a feature called {\em subnet\/} proxy ARP.
- It is replaced with the sysctl flag in Linux-2.2.
-\end{NB}
-
-
-The \verb|ip| utility provides a way to manage proxy ARP/NDISC
-with the \verb|ip neigh| command, namely:
-\begin{verbatim}
- ip neigh add proxy ADDRESS [ dev NAME ]
-\end{verbatim}
-adds a new proxy ARP/NDISC record and
-\begin{verbatim}
- ip neigh del proxy ADDRESS [ dev NAME ]
-\end{verbatim}
-deletes it.
-
-If the name of the device is not given, the router will answer solicitations
-for address \verb|ADDRESS| on all devices, otherwise it will only serve
-the device \verb|NAME|. Even if the proxy entry is created with
-\verb|ip neigh|, the router {\em will not\/} answer a query if the route
-to the destination goes back via the interface from which the solicitation
-was received.
-
-It is important to emphasize that proxy entries have {\em no\/}
-parameters other than these (IP/IPv6 address and optional device).
-Particularly, the entry does not store any link layer address.
-It always advertises the station address of the interface
-on which it sends advertisements (i.e. it's own station address).
-
-\section{Route NAT status}
-\label{ROUTE-NAT}
-
-NAT (or ``Network Address Translation'') remaps some parts
-of the IP address space into other ones. Linux-2.2 route NAT is supposed
-to be used to facilitate policy routing by rewriting addresses
-to other routing domains or to help while renumbering sites
-to another prefix.
-
-\paragraph{What it is not:}
-It is necessary to emphasize that {\em it is not supposed\/}
-to be used to compress address space or to split load.
-This is not missing functionality but a design principle.
-Route NAT is {\em stateless\/}. It does not hold any state
-about translated sessions. This means that it handles any number
-of sessions flawlessly. But it also means that it is {\em static\/}.
-It cannot detect the moment when the last TCP client stops
-using an address. For the same reason, it will not help to split
-load between several servers.
-\begin{NB}
-It is a pretty commonly held belief that it is useful to split load between
-several servers with NAT. This is a mistake. All you get from this
-is the requirement that the router keep the state of all the TCP connections
-going via it. Well, if the router is so powerful, run apache on it. 8)
-\end{NB}
-
-The second feature: it does not touch packet payload,
-does not try to ``improve'' broken protocols by looking
-through its data and mangling it. It mangles IP addresses,
-only IP addresses and nothing but IP addresses.
-This also, is not missing any functionality.
-
-To resume: if you need to compress address space or keep
-active FTP clients happy, your choice is not route NAT but masquerading,
-port forwarding, NAPT etc.
-\begin{NB}
-By the way, you may also want to look at
-http://www.suse.com/\~mha/HyperNews/get/linux-ip-nat.html
-\end{NB}
-
-
-\paragraph{How it works.}
-Some part of the address space is reserved for dummy addresses
-which will look for all the world like some host addresses
-inside your network. No other hosts may use these addresses,
-however other routers may also be configured to translate them.
-\begin{NB}
-A great advantage of route NAT is that it may be used not
-only in stub networks but in environments with arbitrarily complicated
-structure. It does not firewall, it {\em forwards.}
-\end{NB}
-These addresses are selected by the \verb|ip route| command
-(sec.\ref{IP-ROUTE-ADD}, p.\pageref{IP-ROUTE-ADD}). F.e.\
-\begin{verbatim}
- ip route add nat 192.203.80.144 via 193.233.7.83
-\end{verbatim}
-states that the single address 192.203.80.144 is a dummy NAT address.
-For all the world it looks like a host address inside our network.
-For neighbouring hosts and routers it looks like the local address
-of the translating router. The router answers ARP for it, advertises
-this address as routed via it, {\em et al\/}. When the router
-receives a packet destined for 192.203.80.144, it replaces
-this address with 193.233.7.83 which is the address of some real
-host and forwards the packet. If you need to remap
-blocks of addresses, you may use a command like:
-\begin{verbatim}
- ip route add nat 192.203.80.192/26 via 193.233.7.64
-\end{verbatim}
-This command will map a block of 63 addresses 192.203.80.192-255 to
-193.233.7.64-127.
-
-When an internal host (193.233.7.83 in the example above)
-sends something to the outer world and these packets are forwarded
-by our router, it should translate the source address 193.233.7.83
-into 192.203.80.144. This task is solved by setting a special
-policy rule (sec.\ref{IP-RULE-ADD}, p.\pageref{IP-RULE-ADD}):
-\begin{verbatim}
- ip rule add prio 320 from 193.233.7.83 nat 192.203.80.144
-\end{verbatim}
-This rule says that the source address 193.233.7.83
-should be translated into 192.203.80.144 before forwarding.
-It is important that the address after the \verb|nat| keyword
-is some NAT address, declared by {\tt ip route add nat}.
-If it is just a random address the router will not map to it.
-\begin{NB}
-The exception is when the address is a local address of this
-router (or 0.0.0.0) and masquerading is configured in the linux-2.2
-kernel. In this case the router will masquerade the packets as this address.
-If 0.0.0.0 is selected, the result is equivalent to one
-obtained with firewalling rules. Otherwise, you have the way
-to order Linux to masquerade to this fixed address.
-NAT mechanism used in linux-2.4 is more flexible than
-masquerading, so that this feature has lost meaning and disabled.
-\end{NB}
-
-If the network has non-trivial internal structure, it is
-useful and even necessary to add rules disabling translation
-when a packet does not leave this network. Let us return to the
-example from sec.\ref{IP-RULE-SHOW} (p.\pageref{IP-RULE-SHOW}).
-\begin{verbatim}
-300: from 193.233.7.83 to 193.233.7.0/24 lookup main
-310: from 193.233.7.83 to 192.203.80.0/24 lookup main
-320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
-\end{verbatim}
-This block of rules causes normal forwarding when
-packets from 193.233.7.83 do not leave networks 193.233.7/24
-and 192.203.80/24. Also, if the \verb|inr.ruhep| table does not
-contain a route to the destination (which means that the routing
-domain owning addresses from 192.203.80/24 is dead), no translation
-will occur. Otherwise, the packets are translated.
-
-\paragraph{How to only translate selected ports:}
-If you only want to translate selected ports (f.e.\ http)
-and leave the rest intact, you may use \verb|ipchains|
-to \verb|fwmark| a class of packets.
-Suppose you did and all the packets from 193.233.7.83
-destined for port 80 are marked with marker 0x1234 in input fwchain.
-In this case you may replace rule \#320 with:
-\begin{verbatim}
-320: from 193.233.7.83 fwmark 1234 lookup main map-to 192.203.80.144
-\end{verbatim}
-and translation will only be enabled for outgoing http requests.
-
-\section{Example: minimal host setup}
-\label{EXAMPLE-SETUP}
-
-The following script gives an example of a fault safe
-setup of IP (and IPv6, if it is compiled into the kernel)
-in the common case of a node attached to a single broadcast
-network. A more advanced script, which may be used both on multihomed
-hosts and on routers, is described in the following
-section.
-
-The utilities used in the script may be found in the
-directory ftp://ftp.inr.ac.ru/ip-routing/:
-\begin{enumerate}
-\item \verb|ip| --- package \verb|iproute2|.
-\item \verb|arping| --- package \verb|iputils|.
-\item \verb|rdisc| --- package \verb|iputils|.
-\end{enumerate}
-\begin{NB}
-It also refers to a DHCP client, \verb|dhcpcd|. I should refrain from
-recommending a good DHCP client to use. All that I can
-say is that ISC \verb|dhcp-2.0b1pl6| patched with the patch that
-can be found in the \verb|dhcp.bootp.rarp| subdirectory of
-the same ftp site {\em does\/} work,
-at least on Ethernet and Token Ring.
-\end{NB}
-
-\begin{verbatim}
-#! /bin/bash
-\end{verbatim}
-\begin{flushleft}
-\# {\bf Usage: \verb|ifone ADDRESS[/PREFIX-LENGTH] [DEVICE]|}\\
-\# {\bf Parameters:}\\
-\# \$1 --- Static IP address, optionally followed by prefix length.\\
-\# \$2 --- Device name. If it is missing, \verb|eth0| is asssumed.\\
-\# F.e. \verb|ifone 193.233.7.90|
-\end{flushleft}
-\begin{verbatim}
-dev=$2
-: ${dev:=eth0}
-ipaddr=
-\end{verbatim}
-\# Parse IP address, splitting prefix length.
-\begin{verbatim}
-if [ "$1" != "" ]; then
- ipaddr=${1%/*}
- if [ "$1" != "$ipaddr" ]; then
- pfxlen=${1#*/}
- fi
- : ${pfxlen:=24}
-fi
-pfx="${ipaddr}/${pfxlen}"
-\end{verbatim}
-
-\begin{flushleft}
-\# {\bf Step 0} --- enable loopback.\\
-\#\\
-\# This step is necessary on any networked box before attempt\\
-\# to configure any other device.\\
-\end{flushleft}
-\begin{verbatim}
-ip link set up dev lo
-ip addr add 127.0.0.1/8 dev lo brd + scope host
-\end{verbatim}
-\begin{flushleft}
-\# IPv6 autoconfigure themself on loopback.\\
-\#\\
-\# If user gave loopback as device, we add the address as alias and exit.
-\end{flushleft}
-\begin{verbatim}
-if [ "$dev" = "lo" ]; then
- if [ "$ipaddr" != "" -a "$ipaddr" != "127.0.0.1" ]; then
- ip address add $ipaddr dev $dev
- exit $?
- fi
- exit 0
-fi
-\end{verbatim}
-
-\noindent\# {\bf Step 1} --- enable device \verb|$dev|
-
-\begin{verbatim}
-if ! ip link set up dev $dev ; then
- echo "Cannot enable interface $dev. Aborting." 1>&2
- exit 1
-fi
-\end{verbatim}
-\begin{flushleft}
-\# The interface is \verb|UP|. IPv6 started stateless autoconfiguration itself,\\
-\# and its configuration finishes here. However,\\
-\# IP still needs some static preconfigured address.
-\end{flushleft}
-\begin{verbatim}
-if [ "$ipaddr" = "" ]; then
- echo "No address for $dev is configured, trying DHCP..." 1>&2
- dhcpcd
- exit $?
-fi
-\end{verbatim}
-
-\begin{flushleft}
-\# {\bf Step 2} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
-\# Send two probes and wait for result for 3 seconds.\\
-\# If the interface opens slower f.e.\ due to long media detection,\\
-\# you want to increase the timeout.\\
-\end{flushleft}
-\begin{verbatim}
-if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
- echo "Address $ipaddr is busy, trying DHCP..." 1>&2
- dhcpcd
- exit $?
-fi
-\end{verbatim}
-\begin{flushleft}
-\# OK, the address is unique, we may add it on the interface.\\
-\#\\
-\# {\bf Step 3} --- Configure the address on the interface.
-\end{flushleft}
-
-\begin{verbatim}
-if ! ip address add $pfx brd + dev $dev; then
- echo "Failed to add $pfx on $dev, trying DHCP..." 1>&2
- dhcpcd
- exit $?
-fi
-\end{verbatim}
-
-\noindent\# {\bf Step 4} --- Announce our presence on the link.
-\begin{verbatim}
-arping -A -c 1 -I $dev $ipaddr
-noarp=$?
-( sleep 2;
- arping -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
-\end{verbatim}
-
-\begin{flushleft}
-\# {\bf Step 5} (optional) --- Add some control routes.\\
-\#\\
-\# 1. Prohibit link local multicast addresses.\\
-\# 2. Prohibit link local (alias, limited) broadcast.\\
-\# 3. Add default multicast route.
-\end{flushleft}
-\begin{verbatim}
-ip route add unreachable 224.0.0.0/24
-ip route add unreachable 255.255.255.255
-if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
- ip route add 224.0.0.0/4 dev $dev scope global
-fi
-\end{verbatim}
-
-\begin{flushleft}
-\# {\bf Step 6} --- Add fallback default route with huge metric.\\
-\# If a proxy ARP server is present on the interface, we will be\\
-\# able to talk to all the Internet without further configuration.\\
-\# It is not so cheap though and we still hope that this route\\
-\# will be overridden by more correct one by rdisc.\\
-\# Do not make this step if the device is not ARPable,\\
-\# because dead nexthop detection does not work on them.
-\end{flushleft}
-\begin{verbatim}
-if [ "$noarp" = "0" ]; then
- ip ro add default dev $dev metric 30000 scope global
-fi
-\end{verbatim}
-
-\begin{flushleft}
-\# {\bf Step 7} --- Restart router discovery and exit.
-\end{flushleft}
-\begin{verbatim}
-killall -HUP rdisc || rdisc -fs
-exit 0
-\end{verbatim}
-
-
-\section{Example: {\protect\tt ifcfg} --- interface address management}
-\label{EXAMPLE-IFCFG}
-
-This is a simplistic script replacing one option of \verb|ifconfig|,
-namely, IP address management. It not only adds
-addresses, but also carries out Duplicate Address Detection~\cite{RFC-DHCP},
-sends unsolicited ARP to update the caches of other hosts sharing
-the interface, adds some control routes and restarts Router Discovery
-when it is necessary.
-
-I strongly recommend using it {\em instead\/} of \verb|ifconfig| both
-on hosts and on routers.
-
-\begin{verbatim}
-#! /bin/bash
-\end{verbatim}
-\begin{flushleft}
-\# {\bf Usage: \verb?ifcfg DEVICE[:ALIAS] [add|del] ADDRESS[/LENGTH] [PEER]?}\\
-\# {\bf Parameters:}\\
-\# ---Device name. It may have alias suffix, separated by colon.\\
-\# ---Command: add, delete or stop.\\
-\# ---IP address, optionally followed by prefix length.\\
-\# ---Optional peer address for pointopoint interfaces.\\
-\# F.e. \verb|ifcfg eth0 193.233.7.90/24|
-
-\noindent\# This function determines, whether it is router or host.\\
-\# It returns 0, if the host is apparently not router.
-\end{flushleft}
-\begin{verbatim}
-CheckForwarding () {
- local sbase fwd
- sbase=/proc/sys/net/ipv4/conf
- fwd=0
- if [ -d $sbase ]; then
- for dir in $sbase/*/forwarding; do
- fwd=$[$fwd + `cat $dir`]
- done
- else
- fwd=2
- fi
- return $fwd
-}
-\end{verbatim}
-\begin{flushleft}
-\# This function restarts Router Discovery.\\
-\end{flushleft}
-\begin{verbatim}
-RestartRDISC () {
- killall -HUP rdisc || rdisc -fs
-}
-\end{verbatim}
-\begin{flushleft}
-\# Calculate ABC "natural" mask length\\
-\# Arg: \$1 = dotquad address
-\end{flushleft}
-\begin{verbatim}
-ABCMaskLen () {
- local class;
- class=${1%%.*}
- if [ $class -eq 0 -o $class -ge 224 ]; then return 0
- elif [ $class -ge 192 ]; then return 24
- elif [ $class -ge 128 ]; then return 16
- else return 8 ; fi
-}
-\end{verbatim}
-
-
-\begin{flushleft}
-\# {\bf MAIN()}\\
-\#\\
-\# Strip alias suffix separated by colon.
-\end{flushleft}
-\begin{verbatim}
-label="label $1"
-ldev=$1
-dev=${1%:*}
-if [ "$dev" = "" -o "$1" = "help" ]; then
- echo "Usage: ifcfg DEV [[add|del [ADDR[/LEN]] [PEER] | stop]" 1>&2
- echo " add - add new address" 1>&2
- echo " del - delete address" 1>&2
- echo " stop - completely disable IP" 1>&2
- exit 1
-fi
-shift
-
-CheckForwarding
-fwd=$?
-\end{verbatim}
-\begin{flushleft}
-\# Parse command. If it is ``stop'', flush and exit.
-\end{flushleft}
-\begin{verbatim}
-deleting=0
-case "$1" in
-add) shift ;;
-stop)
- if [ "$ldev" != "$dev" ]; then
- echo "Cannot stop alias $ldev" 1>&2
- exit 1;
- fi
- ip -4 addr flush dev $dev $label || exit 1
- if [ $fwd -eq 0 ]; then RestartRDISC; fi
- exit 0 ;;
-del*)
- deleting=1; shift ;;
-*)
-esac
-\end{verbatim}
-\begin{flushleft}
-\# Parse prefix, split prefix length, separated by slash.
-\end{flushleft}
-\begin{verbatim}
-ipaddr=
-pfxlen=
-if [ "$1" != "" ]; then
- ipaddr=${1%/*}
- if [ "$1" != "$ipaddr" ]; then
- pfxlen=${1#*/}
- fi
- if [ "$ipaddr" = "" ]; then
- echo "$1 is bad IP address." 1>&2
- exit 1
- fi
-fi
-shift
-\end{verbatim}
-\begin{flushleft}
-\# If peer address is present, prefix length is 32.\\
-\# Otherwise, if prefix length was not given, guess it.
-\end{flushleft}
-\begin{verbatim}
-peer=$1
-if [ "$peer" != "" ]; then
- if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
- echo "Peer address with non-trivial netmask." 1>&2
- exit 1
- fi
- pfx="$ipaddr peer $peer"
-else
- if [ "$pfxlen" = "" ]; then
- ABCMaskLen $ipaddr
- pfxlen=$?
- fi
- pfx="$ipaddr/$pfxlen"
-fi
-if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
- label=
-fi
-\end{verbatim}
-\begin{flushleft}
-\# If deletion was requested, delete the address and restart RDISC
-\end{flushleft}
-\begin{verbatim}
-if [ $deleting -ne 0 ]; then
- ip addr del $pfx dev $dev $label || exit 1
- if [ $fwd -eq 0 ]; then RestartRDISC; fi
- exit 0
-fi
-\end{verbatim}
-\begin{flushleft}
-\# Start interface initialization.\\
-\#\\
-\# {\bf Step 0} --- enable device \verb|$dev|
-\end{flushleft}
-\begin{verbatim}
-if ! ip link set up dev $dev ; then
- echo "Error: cannot enable interface $dev." 1>&2
- exit 1
-fi
-if [ "$ipaddr" = "" ]; then exit 0; fi
-\end{verbatim}
-\begin{flushleft}
-\# {\bf Step 1} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
-\# Send two probes and wait for result for 3 seconds.\\
-\# If the interface opens slower f.e.\ due to long media detection,\\
-\# you want to increase the timeout.\\
-\end{flushleft}
-\begin{verbatim}
-if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
- echo "Error: some host already uses address $ipaddr on $dev." 1>&2
- exit 1
-fi
-\end{verbatim}
-\begin{flushleft}
-\# OK, the address is unique. We may add it to the interface.\\
-\#\\
-\# {\bf Step 2} --- Configure the address on the interface.
-\end{flushleft}
-\begin{verbatim}
-if ! ip address add $pfx brd + dev $dev $label; then
- echo "Error: failed to add $pfx on $dev." 1>&2
- exit 1
-fi
-\end{verbatim}
-\noindent\# {\bf Step 3} --- Announce our presence on the link
-\begin{verbatim}
-arping -q -A -c 1 -I $dev $ipaddr
-noarp=$?
-( sleep 2 ;
- arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
-\end{verbatim}
-\begin{flushleft}
-\# {\bf Step 4} (optional) --- Add some control routes.\\
-\#\\
-\# 1. Prohibit link local multicast addresses.\\
-\# 2. Prohibit link local (alias, limited) broadcast.\\
-\# 3. Add default multicast route.
-\end{flushleft}
-\begin{verbatim}
-ip route add unreachable 224.0.0.0/24 >& /dev/null
-ip route add unreachable 255.255.255.255 >& /dev/null
-if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
- ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
-fi
-\end{verbatim}
-\begin{flushleft}
-\# {\bf Step 5} --- Add fallback default route with huge metric.\\
-\# If a proxy ARP server is present on the interface, we will be\\
-\# able to talk to all the Internet without further configuration.\\
-\# Do not make this step on router or if the device is not ARPable.\\
-\# because dead nexthop detection does not work on them.
-\end{flushleft}
-\begin{verbatim}
-if [ $fwd -eq 0 ]; then
- if [ $noarp -eq 0 ]; then
- ip ro append default dev $dev metric 30000 scope global
- elif [ "$peer" != "" ]; then
- if ping -q -c 2 -w 4 $peer ; then
- ip ro append default via $peer dev $dev metric 30001
- fi
- fi
- RestartRDISC
-fi
-
-exit 0
-\end{verbatim}
-\begin{flushleft}
-\# End of {\bf MAIN()}
-\end{flushleft}
-
-
-\end{document}
diff --git a/doc/ip-tunnels.tex b/doc/ip-tunnels.tex
deleted file mode 100644
index 0a8c930c..00000000
--- a/doc/ip-tunnels.tex
+++ /dev/null
@@ -1,469 +0,0 @@
-\documentstyle[12pt,twoside]{article}
-\def\TITLE{Tunnels over IP}
-\input preamble
-\begin{center}
-\Large\bf Tunnels over IP in Linux-2.2
-\end{center}
-
-
-\begin{center}
-{ \large Alexey~N.~Kuznetsov } \\
-\em Institute for Nuclear Research, Moscow \\
-\verb|kuznet@ms2.inr.ac.ru| \\
-\rm March 17, 1999
-\end{center}
-
-\vspace{5mm}
-
-\tableofcontents
-
-
-\section{Instead of introduction: micro-FAQ.}
-
-\begin{itemize}
-
-\item
-Q: In linux-2.0.36 I used:
-\begin{verbatim}
- ifconfig tunl1 10.0.0.1 pointopoint 193.233.7.65
-\end{verbatim}
-to create tunnel. It does not work in 2.2.0!
-
-A: You are right, it does not work. The command written above is split to two commands.
-\begin{verbatim}
- ip tunnel add MY-TUNNEL mode ipip remote 193.233.7.65
-\end{verbatim}
-will create tunnel device with name \verb|MY-TUNNEL|. Now you may configure
-it with:
-\begin{verbatim}
- ifconfig MY-TUNNEL 10.0.0.1
-\end{verbatim}
-Certainly, if you prefer name \verb|tunl1| to \verb|MY-TUNNEL|,
-you still may use it.
-
-\item
-Q: In linux-2.0.36 I used:
-\begin{verbatim}
- ifconfig tunl0 10.0.0.1
- route add -net 10.0.0.0 gw 193.233.7.65 dev tunl0
-\end{verbatim}
-to tunnel net 10.0.0.0 via router 193.233.7.65. It does not
-work in 2.2.0! Moreover, \verb|route| prints a funny error sort of
-``network unreachable'' and after this I found a strange direct route
-to 10.0.0.0 via \verb|tunl0| in routing table.
-
-A: Yes, in 2.2 the rule that {\em normal} gateway must reside on directly
-connected network has not any exceptions. You may tell kernel, that
-this particular route is {\em abnormal}:
-\begin{verbatim}
- ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
- ip route add 10.0.0.0/8 via 193.233.7.65 dev tunl0 onlink
-\end{verbatim}
-Note keyword \verb|onlink|, it is the magic key that orders kernel
-not to check for consistency of gateway address.
-Probably, after this explanation you have already guessed another method
-to cheat kernel:
-\begin{verbatim}
- ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
- route add -host 193.233.7.65 dev tunl0
- route add -net 10.0.0.0 netmask 255.0.0.0 gw 193.233.7.65
- route del -host 193.233.7.65 dev tunl0
-\end{verbatim}
-Well, if you like such tricks, nobody may prohibit you to use them.
-Only do not forget
-that between \verb|route add| and \verb|route del| host 193.233.7.65 is
-unreachable.
-
-\item
-Q: In 2.0.36 I used to load \verb|tunnel| device module and \verb|ipip| module.
-I cannot find any \verb|tunnel| in 2.2!
-
-A: Linux-2.2 has single module \verb|ipip| for both directions of tunneling
-and for all IPIP tunnel devices.
-
-\item
-Q: \verb|traceroute| does not work over tunnel! Well, stop... It works,
- only skips some number of hops.
-
-A: Yes. By default tunnel driver copies \verb|ttl| value from
-inner packet to outer one. It means that path traversed by tunneled
-packets to another endpoint is not hidden. If you dislike this, or if you
-are going to use some routing protocol expecting that packets
-with ttl 1 will reach peering host (f.e.\ RIP, OSPF or EBGP)
-and you are not afraid of
-tunnel loops, you may append option \verb|ttl 64|, when creating tunnel
-with \verb|ip tunnel add|.
-
-\item
-Q: ... Well, list of things, which 2.0 was able to do finishes.
-
-\end{itemize}
-
-\paragraph{Summary of differences between 2.2 and 2.0.}
-
-\begin{itemize}
-
-\item {\bf In 2.0} you could compile tunnel device into kernel
- and got set of 4 devices \verb|tunl0| ... \verb|tunl3| or,
- alternatively, compile it as module and load new module
- for each new tunnel. Also, module \verb|ipip| was necessary
- to receive tunneled packets.
-
- {\bf 2.2} has {\em one\/} module \verb|ipip|. Loading it you get base
- tunnel device \verb|tunl0| and another tunnels may be created with command
- \verb|ip tunnel add|. These new devices may have arbitrary names.
-
-
-\item {\bf In 2.0} you set remote tunnel endpoint address with
- the command \verb|ifconfig| ... \verb|pointopoint A|.
-
- {\bf In 2.2} this command has the same semantics on all
- the interfaces, namely it sets not tunnel endpoint,
- but address of peering host, which is directly reachable
- via this tunnel,
- rather than via Internet. Actual tunnel endpoint address \verb|A|
- should be set with \verb|ip tunnel add ... remote A|.
-
-\item {\bf In 2.0} you create tunnel routes with the command:
-\begin{verbatim}
- route add -net 10.0.0.0 gw A dev tunl0
-\end{verbatim}
-
- {\bf 2.2} interprets this command equally for all device
- kinds and gateway is required to be directly reachable via this tunnel,
- rather than via Internet. You still may use \verb|ip route add ... onlink|
- to override this behaviour.
-
-\end{itemize}
-
-
-\section{Tunnel setup: basics}
-
-Standard Linux-2.2 kernel supports three flavor of tunnels,
-listed in the following table:
-\vspace{2mm}
-
-\begin{tabular}{lll}
-\vrule depth 0.8ex width 0pt\relax
-Mode & Description & Base device \\
-ipip & IP over IP & tunl0 \\
-sit & IPv6 over IP & sit0 \\
-gre & ANY over GRE over IP & gre0
-\end{tabular}
-
-\vspace{2mm}
-
-\noindent All the kinds of tunnels are created with one command:
-\begin{verbatim}
- ip tunnel add <NAME> mode <MODE> [ local <S> ] [ remote <D> ]
-\end{verbatim}
-
-This command creates new tunnel device with name \verb|<NAME>|.
-The \verb|<NAME>| is an arbitrary string. Particularly,
-it may be even \verb|eth0|. The rest of parameters set
-different tunnel characteristics.
-
-\begin{itemize}
-
-\item
-\verb|mode <MODE>| sets tunnel mode. Three modes are available now
- \verb|ipip|, \verb|sit| and \verb|gre|.
-
-\item
-\verb|remote <D>| sets remote endpoint of the tunnel to IP
- address \verb|<D>|.
-\item
-\verb|local <S>| sets fixed local address for tunneled
- packets. It must be an address on another interface of this host.
-
-\end{itemize}
-
-\let\thefootnote\oldthefootnote
-
-Both \verb|remote| and \verb|local| may be omitted. In this case we
-say that they are zero or wildcard. Two tunnels of one mode cannot
-have the same \verb|remote| and \verb|local|. Particularly it means
-that base device or fallback tunnel cannot be replicated.\footnote{
-This restriction is relaxed for keyed GRE tunnels.}
-
-Tunnels are divided to two classes: {\bf pointopoint} tunnels, which
-have some not wildcard \verb|remote| address and deliver all the packets
-to this destination, and {\bf NBMA} (i.e. Non-Broadcast Multi-Access) tunnels,
-which have no \verb|remote|. Particularly, base devices (f.e.\ \verb|tunl0|)
-are NBMA, because they have neither \verb|remote| nor
-\verb|local| addresses.
-
-
-After tunnel device is created you should configure it as you did
-it with another devices. Certainly, the configuration of tunnels has
-some features related to the fact that they work over existing Internet
-routing infrastructure and simultaneously create new virtual links,
-which changes this infrastructure. The danger that not enough careful
-tunnel setup will result in formation of tunnel loops,
-collapse of routing or flooding network with exponentially
-growing number of tunneled fragments is very real.
-
-
-Protocol setup on pointopoint tunnels does not differ of configuration
-of another devices. You should set a protocol address with \verb|ifconfig|
-and add routes with \verb|route| utility.
-
-NBMA tunnels are different. To route something via NBMA tunnel
-you have to explain to driver, where it should deliver packets to.
-The only way to make it is to create special routes with gateway
-address pointing to desired endpoint. F.e.\
-\begin{verbatim}
- ip route add 10.0.0.0/24 via <A> dev tunl0 onlink
-\end{verbatim}
-It is important to use option \verb|onlink|, otherwise
-kernel will refuse request to create route via gateway not directly
-reachable over device \verb|tunl0|. With IPv6 the situation is much simpler:
-when you start device \verb|sit0|, it automatically configures itself
-with all IPv4 addresses mapped to IPv6 space, so that all IPv4
-Internet is {\em really reachable} via \verb|sit0|! Excellent, the command
-\begin{verbatim}
- ip route add 3FFE::/16 via ::193.233.7.65 dev sit0
-\end{verbatim}
-will route \verb|3FFE::/16| via \verb|sit0|, sending all the packets
-destined to this prefix to 193.233.7.65.
-
-\section{Tunnel setup: options}
-
-Command \verb|ip tunnel add| has several additional options.
-\begin{itemize}
-
-\item \verb|ttl N| --- set fixed TTL \verb|N| on tunneled packets.
- \verb|N| is number in the range 1--255. 0 is special value,
- meaning that packets inherit TTL value.
- Default value is: \verb|inherit|.
-
-\item \verb|tos T| --- set fixed tos \verb|T| on tunneled packets.
- Default value is: \verb|inherit|.
-
-\item \verb|dev DEV| --- bind tunnel to device \verb|DEV|, so that
- tunneled packets will be routed only via this device and will
- not be able to escape to another device, when route to endpoint changes.
-
-\item \verb|nopmtudisc| --- disable Path MTU Discovery on this tunnel.
- It is enabled by default. Note that fixed ttl is incompatible
- with this option: tunnels with fixed ttl always make pmtu discovery.
-
-\end{itemize}
-
-\verb|ipip| and \verb|sit| tunnels have no more options. \verb|gre|
-tunnels are more complicated:
-
-\begin{itemize}
-
-\item \verb|key K| --- use keyed GRE with key \verb|K|. \verb|K| is
- either number or IP address-like dotted quad.
-
-\item \verb|csum| --- checksum tunneled packets.
-
-\item \verb|seq| --- serialize packets.
-\begin{NB}
- I think this option does not
- work. At least, I did not test it, did not debug it and
- even do not understand, how it is supposed to work and for what
- purpose Cisco planned to use it.
-\end{NB}
-
-\end{itemize}
-
-
-Actually, these GRE options can be set separately for input and
-output directions by prefixing corresponding keywords with letter
-\verb|i| or \verb|o|. F.e.\ \verb|icsum| orders to accept only
-packets with correct checksum and \verb|ocsum| means, that
-our host will calculate and send checksum.
-
-Command \verb|ip tunnel add| is not the only operation,
-which can be made with tunnels. Certainly, you may get short help page
-with:
-\begin{verbatim}
- ip tunnel help
-\end{verbatim}
-
-Besides that, you may view list of installed tunnels with the help of command:
-\begin{verbatim}
- ip tunnel ls
-\end{verbatim}
-Also you may look at statistics:
-\begin{verbatim}
- ip -s tunnel ls Cisco
-\end{verbatim}
-where \verb|Cisco| is name of tunnel device. Command
-\begin{verbatim}
- ip tunnel del Cisco
-\end{verbatim}
-destroys tunnel \verb|Cisco|. And, finally,
-\begin{verbatim}
- ip tunnel change Cisco mode sit local ME remote HE ttl 32
-\end{verbatim}
-changes its parameters.
-
-\section{Differences 2.2 and 2.0 tunnels revisited.}
-
-Now we can discuss more subtle differences between tunneling in 2.0
-and 2.2.
-
-\begin{itemize}
-
-\item In 2.0 all tunneled packets were received promiscuously
-as soon as you loaded module \verb|ipip|. 2.2 tries to select the best
-tunnel device and packet looks as received on this. F.e.\ if host
-received \verb|ipip| packet from host \verb|D| destined to our
-local address \verb|S|, kernel searches for matching tunnels
-in order:
-
-\begin{tabular}{ll}
-1 & \verb|remote| is \verb|D| and \verb|local| is \verb|S| \\
-2 & \verb|remote| is \verb|D| and \verb|local| is wildcard \\
-3 & \verb|remote| is wildcard and \verb|local| is \verb|S| \\
-4 & \verb|tunl0|
-\end{tabular}
-
-If tunnel exists, but it is not in \verb|UP| state, the tunnel is ignored.
-Note, that if \verb|tunl0| is \verb|UP| it receives all the IPIP packets,
-not acknowledged by more specific tunnels.
-Be careful, it means that without carefully installed firewall rules
-anyone on the Internet may inject to your network any packets with
-source addresses indistinguishable from local ones. It is not so bad idea
-to design tunnels in the way enforcing maximal route symmetry
-and to enable reversed path filter (\verb|rp_filter| sysctl option) on
-tunnel devices.
-
-\item In 2.2 you can monitor and debug tunnels with \verb|tcpdump|.
-F.e.\ \verb|tcpdump| \verb|-i Cisco| \verb|-nvv| will dump packets,
-which kernel output, via tunnel \verb|Cisco| and the packets received on it
-from kernel viewpoint.
-
-\end{itemize}
-
-
-\section{Linux and Cisco IOS tunnels.}
-
-Among another tunnels Cisco IOS supports IPIP and GRE.
-Essentially, Cisco setup is subset of options, available for Linux.
-Let us consider the simplest example:
-
-\begin{verbatim}
-interface Tunnel0
- tunnel mode gre ip
- tunnel source 10.10.14.1
- tunnel destination 10.10.13.2
-\end{verbatim}
-
-
-This command set translates to:
-
-\begin{verbatim}
- ip tunnel add Tunnel0 \
- mode gre \
- local 10.10.14.1 \
- remote 10.10.13.2
-\end{verbatim}
-
-Any questions? No questions.
-
-\section{Interaction IPIP tunnels and DVMRP.}
-
-DVMRP exploits IPIP tunnels to route multicasts via Internet.
-\verb|mrouted| creates
-IPIP tunnels listed in its configuration file automatically.
-From kernel and user viewpoints there are no differences between
-tunnels, created in this way, and tunnels created by \verb|ip tunnel|.
-I.e.\ if \verb|mrouted| created some tunnel, it may be used to
-route unicast packets, provided appropriate routes are added.
-And vice versa, if administrator has already created a tunnel,
-it will be reused by \verb|mrouted|, if it requests DVMRP
-tunnel with the same local and remote addresses.
-
-Do not wonder, if your manually configured tunnel is
-destroyed, when mrouted exits.
-
-
-\section{Broadcast GRE ``tunnels''.}
-
-It is possible to set \verb|remote| for GRE tunnel to a multicast
-address. Such tunnel becomes {\bf broadcast} tunnel (though word
-tunnel is not quite appropriate in this case, it is rather virtual network).
-\begin{verbatim}
- ip tunnel add Universe local 193.233.7.65 \
- remote 224.66.66.66 ttl 16
- ip addr add 10.0.0.1/16 dev Universe
- ip link set Universe up
-\end{verbatim}
-This tunnel is true broadcast network and broadcast packets are
-sent to multicast group 224.66.66.66. By default such tunnel starts
-to resolve both IP and IPv6 addresses via ARP/NDISC, so that
-if multicast routing is supported in surrounding network, all GRE nodes
-will find one another automatically and will form virtual Ethernet-like
-broadcast network. If multicast routing does not work, it is unpleasant
-but not fatal flaw. The tunnel becomes NBMA rather than broadcast network.
-You may disable dynamic ARPing by:
-\begin{verbatim}
- echo 0 > /proc/sys/net/ipv4/neigh/Universe/mcast_solicit
-\end{verbatim}
-and to add required information to ARP tables manually:
-\begin{verbatim}
- ip neigh add 10.0.0.2 lladdr 128.6.190.2 dev Universe nud permanent
-\end{verbatim}
-In this case packets sent to 10.0.0.2 will be encapsulated in GRE
-and sent to 128.6.190.2. It is possible to facilitate address resolution
-using methods typical for another NBMA networks f.e.\ to start user
-level \verb|arpd| daemon, which will maintain database of hosts attached
-to GRE virtual network or ask for information
-dedicated ARP or NHRP server.
-
-
-Actually, such setup is the most natural for tunneling,
-it is really flexible, scalable and easily managable, so that
-it is strongly recommended to be used with GRE tunnels instead of ugly
-hack with NBMA mode and \verb|onlink| modifier. Unfortunately,
-by historical reasons broadcast mode is not supported by IPIP tunnels,
-but this probably will change in future.
-
-
-
-\section{Traffic control issues.}
-
-Tunnels are devices, hence all the power of Linux traffic control
-applies to them. The simplest (and the most useful in practice)
-example is limiting tunnel bandwidth. The following command:
-\begin{verbatim}
- tc qdisc add dev tunl0 root tbf \
- rate 128Kbit burst 4K limit 10K
-\end{verbatim}
-will limit tunneled traffic to 128Kbit with maximal burst size of 4K
-and queuing not more than 10K.
-
-However, you should remember, that tunnels are {\em virtual} devices
-implemented in software and true queue management is impossible for them
-just because they have no queues. Instead, it is better to create classes
-on real physical interfaces and to map tunneled packets to them.
-In general case of dynamic routing you should create such classes
-on all outgoing interfaces, or, alternatively,
-to use option \verb|dev DEV| to bind tunnel to a fixed physical device.
-In the last case packets will be routed only via specified device
-and you need to setup corresponding classes only on it.
-Though you have to pay for this convenience,
-if routing will change, your tunnel will fail.
-
-Suppose that CBQ class \verb|1:ABC| has been created on device \verb|eth0|
-specially for tunnel \verb|Cisco| with endpoints \verb|S| and \verb|D|.
-Now you can select IPIP packets with addresses \verb|S| and \verb|D|
-with some classifier and map them to class \verb|1:ABC|. F.e.\
-it is easy to make with \verb|rsvp| classifier:
-\begin{verbatim}
- tc filter add dev eth0 pref 100 proto ip rsvp \
- session D ipproto ipip filter S \
- classid 1:ABC
-\end{verbatim}
-
-If you want to make more detailed classification of sub-flows
-transmitted via tunnel, you can build CBQ subtree,
-rooted at \verb|1:ABC| and attach to subroot set of rules parsing
-IPIP packets more deeply.
-
-\end{document}
diff --git a/doc/nstat.sgml b/doc/nstat.sgml
deleted file mode 100644
index 48cacc69..00000000
--- a/doc/nstat.sgml
+++ /dev/null
@@ -1,110 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>NSTAT, IFSTAT and RTACCT Utilities
-<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
-<date>some_negative_number, 20 Sep 2001
-<abstract>
-<tt/nstat/, <tt/ifstat/ and <tt/rtacct/ are simple tools helping
-to monitor kernel snmp counters and network interface statistics.
-</abstract>
-
-<p> These utilities are very similar, so that I describe
-them simultaneously, using name <tt/Xstat/ in the places which apply
-to all of them.
-
-<p>The format of the command is:
-
-<tscreen><verb>
- Xstat [ OPTIONS ] [ PATTERN [ PATTERN ... ] ]
-</verb></tscreen>
-
-<p>
-<tt/PATTERN/ is shell style pattern, selecting identifier
-of SNMP variables or interfaces to show. Variable is displayed
-if one of patterns matches its name. If no patterns are given,
-<tt/Xstat/ assumes that user wants to see all the variables.
-
-<p> <tt/OPTIONS/ is list of single letter options, using common unix
-conventions.
-
-<itemize>
-<item><tt/-h/ - show help page
-<item><tt/-?/ - the same, of course
-<item><tt/-v/, <tt/-V/ - print version of <tt/Xstat/ and exit
-<item><tt/-z/ - dump zero counters too. By default they are not shown.
-<item><tt/-a/ - dump absolute values of counters. By default <tt/Xstat/
- calculates increments since the previous use.
-<item><tt/-s/ - do not update history, so that the next time you will
- see counters including values accumulated to the moment
- of this measurement too.
-<item><tt/-n/ - do not display anything, only update history.
-<item><tt/-r/ - reset history.
-<item><tt/-d INTERVAL/ - <tt/Xstat/ is run in daemon mode collecting
- statistics. <tt/INTERVAL/ is interval between measurements
- in seconds.
-<item><tt/-t INTERVAL/ - time interval to average rates. Default value
- is 60 seconds.
-<item><tt/-e/ - display extended information about errors (<tt/ifstat/ only).
-</itemize>
-
-<p>
-History is just dump saved in file <tt>/tmp/.Xstat.uUID</tt>
-or in file given by environment variables <tt/NSTAT_HISTORY/,
-<tt/IFSTAT_HISTORY/ and <tt/RTACCT_HISTORY/.
-Each time when you use <tt/Xstat/ values there are updated.
-If you use patterns, only the values which you _really_ see
-are updated. If you want to skip an unintersting period,
-use option <tt/-n/, or just output to <tt>/dev/null</tt>.
-
-<p>
-<tt/Xstat/ understands when history is invalidated by system reboot
-or source of information switched between different instances
-of daemonic <tt/Xstat/ and kernel SNMP tables and does not
-use invalid history.
-
-<p> Beware, <tt/Xstat/ will not produce sane output,
-when many processes use it simultaneously. If several processes
-under single user need this utility they should use environment
-variables to put their history in safe places
-or to use it with options <tt/-a -s/.
-
-<p>
-Well, that's all. The utility is very simple, but nevertheless
-very handy.
-
-<p> <bf/Output of XSTAT/
-<p> The first line of output is <tt/#/ followed by identifier
-of source of information, it may be word <tt/kernel/, when <tt/Xstat/
-gets information from kernel or some dotted decimal number followed
-by parameters, when it obtains information from running <tt/Xstat/ daemon.
-
-<p>In the case of <tt/nstat/ the rest of output consists of three columns:
-SNMP MIB identifier,
-its value (or increment since previous measurement) and average
-rate of increase of the counter per second. <tt/ifstat/ outputs
-interface name followed by pairs of counter and rate of its change.
-
-<p> <bf/Daemonic Xstat/
-<p> <tt/Xstat/ may be started as daemon by any user. This makes sense
-to avoid wrapped counters and to obtain reasonable long counters
-for large time. Also <tt/Xstat/ daemon calculates average rates.
-For the first goal sampling interval (option <tt/-d/) may be large enough,
-f.e. for gigabit rates byte counters overflow not more frequently than
-each 40 seconds and you may select interval of 20 seconds.
-From the other hand, when <tt/Xstat/ is used for estimating rates
-interval should be less than averaging period (option <tt/-t/), otherwise
-estimation loses in quality.
-
-Client <tt/Xstat/, before trying to get information from the kernel,
-contacts daemon started by this user, then it tries system wide
-daemon, which is supposed to be started by superuser. And only if
-none of them replied it gets information from kernel.
-
-<p> <bf/Environment/
-<p> <tt/NSTAT_HISTORY/ - name of history file for <tt/nstat/.
-<p> <tt/IFSTAT_HISTORY/ - name of history file for <tt/ifstat/.
-<p> <tt/RTACCT_HISTORY/ - name of history file for <tt/rtacct/.
-
-</article>
diff --git a/doc/preamble.tex b/doc/preamble.tex
deleted file mode 100644
index 80ca5087..00000000
--- a/doc/preamble.tex
+++ /dev/null
@@ -1,26 +0,0 @@
-\textwidth 6.0in
-\textheight 8.5in
-
-\input SNAPSHOT
-
-\pagestyle{myheadings}
-\markboth{\protect\TITLE}{}
-\markright{{\protect\sc iproute2-ss\Draft}}
-
-% To print it in compact form: both sides on one sheet (psnup -2)
-\evensidemargin=\oddsidemargin
-
-\newenvironment{NB}{\bgroup \vskip 1mm\leftskip 1cm \footnotesize \noindent NB.
-}{\par\egroup \vskip 1mm}
-
-\def\threeonly{[2.3.15+ only] }
-
-\begin{document}
-
-\makeatletter
-\renewcommand{\@oddhead}{{\protect\sc iproute2-ss\Draft} \hfill \protect\arabic{page}}
-\makeatother
-\let\oldthefootnote\thefootnote
-\def\thefootnote{}
-\footnotetext{Copyright \copyright~1999 A.N.Kuznetsov}
-
diff --git a/doc/rtstat.sgml b/doc/rtstat.sgml
deleted file mode 100644
index 07391c39..00000000
--- a/doc/rtstat.sgml
+++ /dev/null
@@ -1,52 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>RTACCT Utility
-<author>Robert Olsson
-<date>some_negative_number, 20 Dec 2001
-
-<p>
-Here is some code for monitoring the route cache. For systems handling high
-network load, servers, routers, firewalls etc the route cache and its garbage
-collection is crucial. Linux has a solid implementation.
-
-<p>
-The kernel patch (not required since linux-2.4.7) adds statistics counters
-from route cache process into
-/proc/net/rt_cache_stat. A companion user mode program presents the statistics
-in a vmstat or iostat manner. The ratio between cache hits and misses gives
-the flow length.
-
-<p>
-Hopefully it can help understanding performance and DoS and other related
-issues.
-
-<p> An URL where newer versions of this utility can be (probably) found
-is ftp://robur.slu.se/pub/Linux/net-development/rt_cache_stat/
-
-
-<p><bf/Description/
-
-<p>The format of the command is:
-
-<tscreen><verb>
- rtstat [ OPTIONS ]
-</verb></tscreen>
-
-<p> <tt/OPTIONS/ are:
-
-<itemize>
-
-<item><tt/-h/, <tt/-help/ - show help page and version of the utility.
-
-<item><tt/-i INTERVAL/ - interval between snapshots, default value is
-2 seconds.
-
-<item><tt/-s NUMBER/ - whether to print header line. 0 inhibits header line,
-1 prescribes to print it once and 2 (this is default setting) forces header
-line each 20 lines.
-
-</itemize>
-
-</article>
diff --git a/doc/ss.sgml b/doc/ss.sgml
deleted file mode 100644
index 3024b574..00000000
--- a/doc/ss.sgml
+++ /dev/null
@@ -1,525 +0,0 @@
-<!doctype linuxdoc system>
-
-<article>
-
-<title>SS Utility: Quick Intro
-<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
-<date>some_negative_number, 20 Sep 2001
-<abstract>
-<tt/ss/ is one another utility to investigate sockets.
-Functionally it is NOT better than <tt/netstat/ combined
-with some perl/awk scripts and though it is surely faster
-it is not enough to make it much better. :-)
-So, stop reading this now and do not waste your time.
-Well, certainly, it proposes some functionality, which current
-netstat is still not able to do, but surely will soon.
-</abstract>
-
-<sect>Why?
-
-<p> <tt>/proc</tt> interface is inadequate, unfortunately.
-When amount of sockets is enough large, <tt/netstat/ or even
-plain <tt>cat /proc/net/tcp/</tt> cause nothing but pains and curses.
-In linux-2.4 the desease became worse: even if amount
-of sockets is small reading <tt>/proc/net/tcp/</tt> is slow enough.
-
-This utility presents a new approach, which is supposed to scale
-well. I am not going to describe technical details here and
-will concentrate on description of the command.
-The only important thing to say is that it is not so bad idea
-to load module <tt/tcp_diag/, which can be found in directory
-<tt/Modules/ of <tt/iproute2/. If you do not make this <tt/ss/
-will work, but it falls back to <tt>/proc</tt> and becomes slow
-like <tt/netstat/, well, a bit faster yet (see section "Some numbers").
-
-<sect>Old news
-
-<p>
-In the simplest form <tt/ss/ is equivalent to netstat
-with some small deviations.
-
-<itemize>
-<item><tt/ss -t -a/ dumps all TCP sockets
-<item><tt/ss -u -a/ dumps all UDP sockets
-<item><tt/ss -w -a/ dumps all RAW sockets
-<item><tt/ss -x -a/ dumps all UNIX sockets
-</itemize>
-
-<p>
-Option <tt/-o/ shows TCP timers state.
-Option <tt/-e/ shows some extended information.
-Etc. etc. etc. Seems, all the options of netstat related to sockets
-are supported. Though not AX.25 and other bizarres. :-)
-If someone wants, he can make support for decnet and ipx.
-Some rudimentary support for them is already present in iproute2 libutils,
-and I will be glad to see these new members.
-
-<p>
-However, standard functionality is a bit different:
-
-<p>
-The first: without option <tt/-a/ sockets in states
-<tt/TIME-WAIT/ and <tt/SYN-RECV/ are skipped too.
-It is more reasonable default, I think.
-
-<p>
-The second: format of UNIX sockets is different. It coincides
-with tcp/udp. Though standard kernel still does not allow to
-see write/read queues and peer address of connected UNIX sockets,
-the patch doing this exists.
-
-<p>
-The third: default is to dump only TCP sockets, rather than all of the types.
-
-<p>
-The next: by default it does not resolve numeric host addresses (like <tt/ip/)!
-Resolving is enabled with option <tt/-r/. Service names, usually stored
-in local files, are resolved by default. Also, if service database
-does not contain references to a port, <tt/ss/ queries system
-<tt/rpcbind/. RPC services are prefixed with <tt/rpc./
-Resolution of services may be suppressed with option <tt/-n/.
-
-<p>
-It does not accept "long" options (I dislike them, sorry).
-So, address family is given with family identifier following
-option <tt/-f/ to be algined to iproute2 conventions.
-Mostly, it is to allow option parser to parse
-addresses correctly, but as side effect it really limits dumping
-to sockets supporting only given family. Option <tt/-A/ followed
-by list of socket tables to dump is also supported.
-Logically, id of socket table is different of _address_ family, which is
-another point of incompatibility. So, id is one of
-<tt/all/, <tt/tcp/, <tt/udp/,
-<tt/raw/, <tt/inet/, <tt/unix/, <tt/packet/, <tt/netlink/. See?
-Well, <tt/inet/ is just abbreviation for <tt/tcp|udp|raw/
-and it is not difficult to guess that <tt/packet/ allows
-to look at packet sockets. Actually, there are also some other abbreviations,
-f.e. <tt/unix_dgram/ selects only datagram UNIX sockets.
-
-<p>
-The next: well, I still do not know. :-)
-
-
-
-
-<sect>Time to talk about new functionality.
-
-<p>It is builtin filtering of socket lists.
-
-<sect1> Filtering by state.
-
-<p>
-<tt/ss/ allows to filter socket states, using keywords
-<tt/state/ and <tt/exclude/, followed by some state
-identifier.
-
-<p>
-State identifier are standard TCP state names (not listed,
-they are useless for you if you already do not know them)
-or abbreviations:
-
-<itemize>
-<item><tt/all/ - for all the states
-<item><tt/bucket/ - for TCP minisockets (<tt/TIME-WAIT|SYN-RECV/)
-<item><tt/big/ - all except for minisockets
-<item><tt/connected/ - not closed and not listening
-<item><tt/synchronized/ - connected and not <tt/SYN-SENT/
-</itemize>
-
-<p>
- F.e. to dump all tcp sockets except <tt/SYN-RECV/:
-
-<tscreen><verb>
- ss exclude SYN-RECV
-</verb></tscreen>
-
-<p>
- If neither <tt/state/ nor <tt/exclude/ directives
- are present,
- state filter defaults to <tt/all/ with option <tt/-a/
- or to <tt/all/,
- excluding listening, syn-recv, time-wait and closed sockets.
-
-<sect1> Filtering by addresses and ports.
-
-<p>
-Option list may contain address/port filter.
-It is boolean expression which consists of boolean operation
-<tt/or/, <tt/and/, <tt/not/ and predicates.
-Actually, all the flavors of names for boolean operations are eaten:
-<tt/&amp/, <tt/&amp&amp/, <tt/|/, <tt/||/, <tt/!/, but do not forget
-about special sense given to these symbols by unix shells and escape
-them correctly, when used from command line.
-
-<p>
-Predicates may be of the folowing kinds:
-
-<itemize>
-<item>A. Address/port match, where address is checked against mask
- and port is either wildcard or exact. It is one of:
-
-<tscreen><verb>
- dst prefix:port
- src prefix:port
- src unix:STRING
- src link:protocol:ifindex
- src nl:channel:pid
-</verb></tscreen>
-
- Both prefix and port may be absent or replaced with <tt/*/,
- which means wildcard. UNIX socket use more powerful scheme
- matching to socket names by shell wildcards. Also, prefixes
- unix: and link: may be omitted, if address family is evident
- from context (with option <tt/-x/ or with <tt/-f unix/
- or with <tt/unix/ keyword)
-
-<p>
- F.e.
-
-<tscreen><verb>
- dst 10.0.0.1
- dst 10.0.0.1:
- dst 10.0.0.1/32:
- dst 10.0.0.1:*
-</verb></tscreen>
- are equivalent and mean socket connected to
- any port on host 10.0.0.1
-
-<tscreen><verb>
- dst 10.0.0.0/24:22
-</verb></tscreen>
- sockets connected to port 22 on network
- 10.0.0.0...255.
-
-<p>
- Note that port separated of address with colon, which creates
- troubles with IPv6 addresses. Generally, we interpret the last
- colon as splitting port. To allow to give IPv6 addresses,
- trick like used in IPv6 HTTP URLs may be used:
-
-<tscreen><verb>
- dst [::1]
-</verb></tscreen>
- are sockets connected to ::1 on any port
-
-<p>
- Another way is <tt/dst ::1/128/. / helps to understand that
- colon is part of IPv6 address.
-
-<p>
- Now we can add another alias for <tt/dst 10.0.0.1/:
- <tt/dst [10.0.0.1]/. :-)
-
-<p> Address may be a DNS name. In this case all the addresses are looked
- up (in all the address families, if it is not limited by option <tt/-f/
- or special address prefix <tt/inet:/, <tt/inet6/) and resulting
- expression is <tt/or/ over all of them.
-
-<item> B. Port expressions:
-<tscreen><verb>
- dport &gt= :1024
- dport != :22
- sport &lt :32000
-</verb></tscreen>
- etc.
-
- All the relations: <tt/&lt/, <tt/&gt/, <tt/=/, <tt/>=/, <tt/=/, <tt/==/,
- <tt/!=/, <tt/eq/, <tt/ge/, <tt/lt/, <tt/ne/...
- Use variant which you like more, but not forget to escape special
- characters when typing them in command line. :-)
-
- Note that port number syntactically coincides to the case A!
- You may even add an IP address, but it will not participate
- incomparison, except for <tt/==/ and <tt/!=/, which are equivalent
- to corresponding predicates of type A. F.e.
-<p>
-<tt/dst 10.0.0.1:22/
- is equivalent to <tt/dport eq 10.0.0.1:22/
- and
- <tt/not dst 10.0.0.1:22/ is equivalent to
- <tt/dport neq 10.0.0.1:22/
-
-<item>C. Keyword <tt/autobound/. It matches to sockets bound automatically
- on local system.
-
-</itemize>
-
-
-<sect> Examples
-
-<p>
-<itemize>
-<item>1. List all the tcp sockets in state <tt/FIN-WAIT-1/ for our apache
- to network 193.233.7/24 and look at their timers:
-
-<tscreen><verb>
- ss -o state fin-wait-1 \( sport = :http or sport = :https \) \
- dst 193.233.7/24
-</verb></tscreen>
-
- Oops, forgot to say that missing logical operation is
- equivalent to <tt/and/.
-
-<item> 2. Well, now look at the rest...
-
-<tscreen><verb>
- ss -o excl fin-wait-1
- ss state fin-wait-1 \( sport neq :http and sport neq :https \) \
- or not dst 193.233.7/24
-</verb></tscreen>
-
- Note that we have to do _two_ calls of ss to do this.
- State match is always anded to address/port match.
- The reason for this is purely technical: ss does fast skip of
- not matching states before parsing addresses and I consider the
- ability to skip fastly gobs of time-wait and syn-recv sockets
- as more important than logical generality.
-
-<item> 3. So, let's look at all our sockets using autobound ports:
-
-<tscreen><verb>
- ss -a -A all autobound
-</verb></tscreen>
-
-
-<item> 4. And eventually find all the local processes connected
- to local X servers:
-
-<tscreen><verb>
- ss -xp dst "/tmp/.X11-unix/*"
-</verb></tscreen>
-
- Pardon, this does not work with current kernel, patching is required.
- But we still can look at server side:
-
-<tscreen><verb>
- ss -x src "/tmp/.X11-unix/*"
-</verb></tscreen>
-
-</itemize>
-
-
-<sect> Returning to ground: real manual
-
-<p>
-<sect1> Command arguments
-
-<p> General format of arguments to <tt/ss/ is:
-
-<tscreen><verb>
- ss [ OPTIONS ] [ STATE-FILTER ] [ ADDRESS-FILTER ]
-</verb></tscreen>
-
-<sect2><tt/OPTIONS/
-<p> <tt/OPTIONS/ is list of single letter options, using common unix
-conventions.
-
-<itemize>
-<item><tt/-h/ - show help page
-<item><tt/-?/ - the same, of course
-<item><tt/-v/, <tt/-V/ - print version of <tt/ss/ and exit
-<item><tt/-s/ - print summary statistics. This option does not parse
-socket lists obtaining summary from various sources. It is useful
-when amount of sockets is so huge that parsing <tt>/proc/net/tcp</tt>
-is painful.
-<item><tt/-D FILE/ - do not display anything, just dump raw information
-about TCP sockets to <tt/FILE/ after applying filters. If <tt/FILE/ is <tt/-/
-<tt/stdout/ is used.
-<item><tt/-F FILE/ - read continuation of filter from <tt/FILE/.
-Each line of <tt/FILE/ is interpreted like single command line option.
-If <tt/FILE/ is <tt/-/ <tt/stdin/ is used.
-<item><tt/-r/ - try to resolve numeric address/ports
-<item><tt/-n/ - do not try to resolve ports
-<item><tt/-o/ - show some optional information, f.e. TCP timers
-<item><tt/-i/ - show some infomration specific to TCP (RTO, congestion
-window, slow start threshould etc.)
-<item><tt/-e/ - show even more optional information
-<item><tt/-m/ - show extended information on memory used by the socket.
-It is available only with <tt/tcp_diag/ enabled.
-<item><tt/-p/ - show list of processes owning the socket
-<item><tt/-f FAMILY/ - default address family used for parsing addresses.
- Also this option limits listing to sockets supporting
- given address family. Currently the following families
- are supported: <tt/unix/, <tt/inet/, <tt/inet6/, <tt/link/,
- <tt/netlink/.
-<item><tt/-4/ - alias for <tt/-f inet/
-<item><tt/-6/ - alias for <tt/-f inet6/
-<item><tt/-0/ - alias for <tt/-f link/
-<item><tt/-A LIST-OF-TABLES/ - list of socket tables to dump, separated
- by commas. The following identifiers are understood:
- <tt/all/, <tt/inet/, <tt/tcp/, <tt/udp/, <tt/raw/,
- <tt/unix/, <tt/packet/, <tt/netlink/, <tt/unix_dgram/,
- <tt/unix_stream/, <tt/packet_raw/, <tt/packet_dgram/.
-<item><tt/-x/ - alias for <tt/-A unix/
-<item><tt/-t/ - alias for <tt/-A tcp/
-<item><tt/-u/ - alias for <tt/-A udp/
-<item><tt/-w/ - alias for <tt/-A raw/
-<item><tt/-a/ - show sockets of all the states. By default sockets
- in states <tt/LISTEN/, <tt/TIME-WAIT/, <tt/SYN_RECV/
- and <tt/CLOSE/ are skipped.
-<item><tt/-l/ - show only sockets in state <tt/LISTEN/
-</itemize>
-
-<sect2><tt/STATE-FILTER/
-
-<p><tt/STATE-FILTER/ allows to construct arbitrary set of
-states to match. Its syntax is sequence of keywords <tt/state/
-and <tt/exclude/ followed by identifier of state.
-Available identifiers are:
-
-<p>
-<itemize>
-<item> All standard TCP states: <tt/established/, <tt/syn-sent/,
-<tt/syn-recv/, <tt/fin-wait-1/, <tt/fin-wait-2/, <tt/time-wait/,
-<tt/closed/, <tt/close-wait/, <tt/last-ack/, <tt/listen/ and <tt/closing/.
-
-<item><tt/all/ - for all the states
-<item><tt/connected/ - all the states except for <tt/listen/ and <tt/closed/
-<item><tt/synchronized/ - all the <tt/connected/ states except for
-<tt/syn-sent/
-<item><tt/bucket/ - states, which are maintained as minisockets, i.e.
-<tt/time-wait/ and <tt/syn-recv/.
-<item><tt/big/ - opposite to <tt/bucket/
-</itemize>
-
-<sect2><tt/ADDRESS_FILTER/
-
-<p><tt/ADDRESS_FILTER/ is boolean expression with operations <tt/and/, <tt/or/
-and <tt/not/, which can be abbreviated in C style f.e. as <tt/&amp/,
-<tt/&amp&amp/.
-
-<p>
-Predicates check socket addresses, both local and remote.
-There are the following kinds of predicates:
-
-<itemize>
-<item> <tt/dst ADDRESS_PATTERN/ - matches remote address and port
-<item> <tt/src ADDRESS_PATTERN/ - matches local address and port
-<item> <tt/dport RELOP PORT/ - compares remote port to a number
-<item> <tt/sport RELOP PORT/ - compares local port to a number
-<item> <tt/autobound/ - checks that socket is bound to an ephemeral
- port
-</itemize>
-
-<p><tt/RELOP/ is some of <tt/&lt=/, <tt/&gt=/, <tt/==/ etc.
-To make this more convinient for use in unix shell, alphabetic
-FORTRAN-like notations <tt/le/, <tt/gt/ etc. are accepted as well.
-
-<p>The format and semantics of <tt/ADDRESS_PATTERN/ depends on address
-family.
-
-<itemize>
-<item><tt/inet/ - <tt/ADDRESS_PATTERN/ consists of IP prefix, optionally
-followed by colon and port. If prefix or port part is absent or replaced
-with <tt/*/, this means wildcard match.
-<item><tt/inet6/ - The same as <tt/inet/, only prefix refers to an IPv6
-address. Unlike <tt/inet/ colon becomes ambiguous, so that <tt/ss/ allows
-to use scheme, like used in URLs, where address is suppounded with
-<tt/[/ ... <tt/]/.
-<item><tt/unix/ - <tt/ADDRESS_PATTERN/ is shell-style wildcard.
-<item><tt/packet/ - format looks like <tt/inet/, only interface index
-stays instead of port and link layer protocol id instead of address.
-<item><tt/netlink/ - format looks like <tt/inet/, only socket pid
-stays instead of port and netlink channel instead of address.
-</itemize>
-
-<p><tt/PORT/ is syntactically <tt/ADDRESS_PATTERN/ with wildcard
-address part. Certainly, it is undefined for UNIX sockets.
-
-<sect1> Environment variables
-
-<p>
-<tt/ss/ allows to change source of information using various
-environment variables:
-
-<p>
-<itemize>
-<item> <tt/PROC_SLABINFO/ to override <tt>/proc/slabinfo</tt>
-<item> <tt/PROC_NET_TCP/ to override <tt>/proc/net/tcp</tt>
-<item> <tt/PROC_NET_UDP/ to override <tt>/proc/net/udp</tt>
-<item> etc.
-</itemize>
-
-<p>
-Variable <tt/PROC_ROOT/ allows to change root of all the <tt>/proc/</tt>
-hierarchy.
-
-<p>
-Variable <tt/TCPDIAG_FILE/ prescribes to open a file instead of
-requesting kernel to dump information about TCP sockets.
-
-
-<p> This option is used mainly to investigate bug reports,
-when dumps of files usually found in <tt>/proc/</tt> are recevied
-by e-mail.
-
-<sect1> Output format
-
-<p>Six columns. The first is <tt/Netid/, it denotes socket type and
-transport protocol, when it is ambiguous: <tt/tcp/, <tt/udp/, <tt/raw/,
-<tt/u_str/ is abbreviation for <tt/unix_stream/, <tt/u_dgr/ for UNIX
-datagram sockets, <tt/nl/ for netlink, <tt/p_raw/ and <tt/p_dgr/ for
-raw and datagram packet sockets. This column is optional, it will
-be hidden, if filter selects an unique netid.
-
-<p>
-The second column is <tt/State/. Socket state is displayed here.
-The names are standard TCP names, except for <tt/UNCONN/, which
-cannot happen for TCP, but normal for not connected sockets
-of another types. Again, this column can be hidden.
-
-<p>
-Then two columns (<tt/Recv-Q/ and <tt/Send-Q/) showing amount of data
-queued for receive and transmit.
-
-<p>
-And the last two columns display local address and port of the socket
-and its peer address, if the socket is connected.
-
-<p>
-If options <tt/-o/, <tt/-e/ or <tt/-p/ were given, options are
-displayed not in fixed positions but separated by spaces pairs:
-<tt/option:value/. If value is not a single number, it is presented
-as list of values, enclosed to <tt/(/ ... <tt/)/ and separated with
-commas. F.e.
-
-<tscreen><verb>
- timer:(keepalive,111min,0)
-</verb></tscreen>
-is typical format for TCP timer (option <tt/-o/).
-
-<tscreen><verb>
- users:((X,113,3))
-</verb></tscreen>
-is typical for list of users (option <tt/-p/).
-
-
-<sect>Some numbers
-
-<p>
-Well, let us use <tt/pidentd/ and a tool <tt/ibench/ to measure
-its performance. It is 30 requests per second here. Nothing to test,
-it is too slow. OK, let us patch pidentd with patch from directory
-Patches. After this it handles about 4300 requests per second
-and becomes handy tool to pollute socket tables with lots of timewait
-buckets.
-
-<p>
-So, each test starts from pollution tables with 30000 sockets
-and then doing full dump of the table piped to wc and measuring
-timings with time:
-
-<p>Results:
-
-<itemize>
-<item> <tt/netstat -at/ - 15.6 seconds
-<item> <tt/ss -atr/, but without <tt/tcp_diag/ - 5.4 seconds
-<item> <tt/ss -atr/ with <tt/tcp_diag/ - 0.47 seconds
-</itemize>
-
-No comments. Though one comment is necessary, most of time
-without <tt/tcp_diag/ is wasted inside kernel with completely
-blocked networking. More than 10 seconds, yes. <tt/tcp_diag/
-does the same work for 100 milliseconds of system time.
-
-</article>
diff --git a/doc/tc-filters.tex b/doc/tc-filters.tex
deleted file mode 100644
index 54cc0c99..00000000
--- a/doc/tc-filters.tex
+++ /dev/null
@@ -1,514 +0,0 @@
-\documentclass[12pt,twoside]{article}
-
-\usepackage[hidelinks]{hyperref} % \url
-\usepackage{booktabs} % nicer tabulars
-\usepackage{fancyvrb}
-\usepackage{fullpage}
-\usepackage{float}
-
-\newcommand{\iface}{\textit}
-\newcommand{\cmd}{\texttt}
-\newcommand{\man}{\textit}
-\newcommand{\qdisc}{\texttt}
-\newcommand{\filter}{\texttt}
-
-\begin{document}
-\title{QoS in Linux with TC and Filters}
-\author{Phil Sutter (phil@nwl.cc)}
-\date{January 2016}
-\maketitle
-
-Standard practice when transmitting packets over a medium which may block (due
-to congestion, e.g.) is to use a queue which temporarily holds these packets. In
-Linux, this queueing approach is where QoS happens: A Queueing Discipline
-(qdisc) holds multiple packet queues with different priorities for dequeueing to
-the network driver. The classification (i.e. deciding which queue a packet
-should go into) is typically done based on Type Of Service (IPv4) or Traffic
-Class (IPv6) header fields but depending on qdisc implementation, might be
-controlled by the user as well.
-
-Qdiscs come in two flavors, classful or classless. While classless qdiscs are
-not as flexible as classful ones, they also require much less customizing. Often
-it is enough to just attach them to an interface, without exact knowledge of
-what is done internally. Classful qdiscs are the exact opposite: flexible in
-application, they are often not even usable without insightful configuration.
-
-As the name implies, classful qdiscs provide configurable classes to sort
-traffic into. In it's basic form, this is not much different than, say, the
-classless \qdisc{pfifo\_fast} which holds three queues and classifies per
-packet upon priority field. Though typically classes go beyond that by
-supporting nesting and additional characteristics like e.g. maximum traffic
-rate or quantum.
-
-When it comes to controlling the classification process, filters come into play.
-They attach to the parent of a set of classes (i.e. either the qdisc itself or
-a parent class) and specify how a packet (or it's associated flow) has to look
-like in order to suit a given class. To overcome this simplification, it is
-possible to attach multiple filters to the same parent, which then consults each
-of them in row until the first one accepts the packet.
-
-Before getting into detail about what filters there are and how to use them, a
-simple setup of a qdisc with classes is necessary:
-\begin{figure}[H]
-\begin{Verbatim}
- .-------------------------------------------------------.
- | |
- | HTB |
- | |
- | .----------------------------------------------------.|
- | | ||
- | | Class 1:1 ||
- | | ||
- | | .---------------..---------------..---------------.||
- | | | || || |||
- | | | Class 1:10 || Class 1:20 || Class 1:30 |||
- | | | || || |||
- | | | .------------.|| .------------.|| .------------.|||
- | | | | ||| | ||| | ||||
- | | | | fq_codel ||| | fq_codel ||| | fq_codel ||||
- | | | | ||| | ||| | ||||
- | | | '------------'|| '------------'|| '------------'|||
- | | '---------------''---------------''---------------'||
- | '----------------------------------------------------'|
- '-------------------------------------------------------'
-\end{Verbatim}
-\end{figure}
-\noindent
-The following commands establish the basic setup shown:
-\begin{Verbatim}
-(1) # tc qdisc replace dev eth0 root handle 1: htb default 30
-(2) # tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit
-(3) # alias tclass='tc class add dev eth0 parent 1:1'
-(4) # tclass classid 1:10 htb rate 1mbit ceil 20mbit prio 1
-(4) # tclass classid 1:20 htb rate 90mbit ceil 95mbit prio 2
-(4) # tclass classid 1:30 htb rate 1mbit ceil 95mbit prio 3
-(5) # tc qdisc add dev eth0 parent 1:10 fq_codel
-(5) # tc qdisc add dev eth0 parent 1:20 fq_codel
-(5) # tc qdisc add dev eth0 parent 1:30 fq_codel
-\end{Verbatim}
-A little explanation for the unfamiliar reader:
-\begin{enumerate}
-\item Replace the root qdisc of \iface{eth0} by an instance of \qdisc{HTB}.
- Specifying the handle is necessary so it can be referenced in consecutive
- calls to \cmd{tc}. The default class for unclassified traffic is set to
- 30.
-\item Create a single top-level class with handle 1:1 which limits the total
- bandwidth allowed to 95mbit/s. It is assumed that \iface{eth0} is a 100mbit/s link,
- staying a little below that helps to keep the main point of enqueueing in
- the qdisc layer instead of the interface hardware queue or at another
- bottleneck in the network.
-\item Define an alias for the common part of the remaining three calls in order
- to improve readability. This means all remaining classes are attached to the
- common parent class from (2).
-\item Create three child classes for different uses: Class 1:10 has highest
- priority but is tightly limited in bandwidth - fine for interactive
- connections. Class 1:20 has mid priority and high guaranteed bandwidth, for
- high priority bulk traffic. Finally, there's the default class 1:30 with
- lowest priority, low guaranteed bandwidth and the ability to use the full
- link in case it's unused otherwise. This should be fine for uninteresting
- traffic not explicitly taken care of.
-\item Attach a leaf qdisc to each of the child classes created in (4). Since
- \qdisc{HTB} by default attaches \qdisc{pfifo} as leaf qdisc, this step is optional. Still,
- the fairness between different flows provided by the classless \qdisc{fq\_codel} is
- worth the effort.
-\end{enumerate}
-More information about the qdiscs and fine-tuning parameters can be found in
-\man{tc-htb(8)} and \man{tc-fq\_codel(8)}.
-
-Without any additional setup done, now all traffic leaving \iface{eth0} is shaped to
-95mbit/s and directed through class 1:30. This can be verified by looking at the
-\texttt{Sent} field of the class statistics printed via \cmd{tc -s class show dev eth0}:
-Only the root class 1:1 and it's child 1:30 should show any traffic.
-
-
-\section*{Finally time to start filtering!}
-
-Let's begin with a simple one, i.e. reestablishing what \qdisc{pfifo\_fast} did
-automatically based on TOS/Priority field. Linux internally translates the
-header field into the priority field of struct skbuff, which
-\qdisc{pfifo\_fast} uses for
-classification. \man{tc-prio(8)} contains a table listing the priority (and
-ultimately, \qdisc{pfifo\_fast} queue index) each TOS value is being translated into.
-Here is a shorter version:
-\begin{center}
-\begin{tabular}{lll}
-TOS Values & Linux Priority (Number) & Queue Index \\
-\midrule
-0x0 - 0x6 & Best Effort (0) & 1 \\
-0x8 - 0xe & Bulk (2) & 2 \\
-0x10 - 0x16 & Interactive (6) & 0 \\
-0x18 - 0x1e & Interactive Bulk (4) & 1 \\
-\end{tabular}
-\end{center}
-Using the \filter{basic} filter, it is possible to match packets based on that skbuff
-field, which has the added benefit of being IP version agnostic. Since the
-\qdisc{HTB} setup above defaults to class ID 1:30, the Bulk priority can be
-ignored. The \filter{basic} filter allows to combine matches, therefore we get along
-with only two filters:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: basic \
- match 'meta(priority eq 6)' classid 1:10
-# tc filter add dev eth0 parent 1: basic \
- match 'meta(priority eq 0)' \
- or 'meta(priority eq 4)' classid 1:20
-\end{Verbatim}
-A detailed description of the \filter{basic} filter and the ematch syntax it uses can be
-found in \man{tc-basic(8)} and \man{tc-ematch(8)}.
-
-Obviously, this first example cries for optimization. A simple one would be to
-just change the default class from 1:30 to 1:20, so filters are only needed for
-Bulk and Interactive priorities:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: basic \
- match 'meta(priority eq 6)' classid 1:10
-# tc filter add dev eth0 parent 1: basic \
- match 'meta(priority eq 2)' classid 1:20
-\end{Verbatim}
-Given that class IDs are random, choosing them wisely allows for a direct
-mapping. So first, recreate the qdisc and classes configuration:
-\begin{Verbatim}
-# tc qdisc replace dev eth0 root handle 1: htb default 10
-# tc class add dev eth0 parent 1: classid 1:1 htb rate 95mbit
-# alias tclass='tc class add dev eth0 parent 1:1'
-# tclass classid 1:16 htb rate 1mbit ceil 20mbit prio 1
-# tclass classid 1:10 htb rate 90mbit ceil 95mbit prio 2
-# tclass classid 1:12 htb rate 1mbit ceil 95mbit prio 3
-# tc qdisc add dev eth0 parent 1:16 fq_codel
-# tc qdisc add dev eth0 parent 1:10 fq_codel
-# tc qdisc add dev eth0 parent 1:12 fq_codel
-\end{Verbatim}
-This is basically identical to above, but with changed leaf class IDs and the
-second priority class being the default. Using the \filter{flow} filter with it's \texttt{map}
-functionality, a single filter command is enough:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: handle 0x1337 flow \
- map key priority baseclass 1:10
-\end{Verbatim}
-The \filter{flow} filter now uses the priority value to construct a destination class ID
-by adding it to the value of \texttt{baseclass}. While this works for priority values of
-0, 2 and 6, it will result in non-existent class ID 1:14 for Interactive Bulk
-traffic. In that case, the \qdisc{HTB} default applies so that traffic goes into class
-ID 1:10 just as intended. Please note that specifying a handle is a mandatory
-requirement by the \filter{flow} filter, although I didn't see where one would use that
-later. For more information about \filter{flow}, see \man{tc-flow(8)}.
-
-While \filter{flow} and \filter{basic} filters are relatively easy to apply and understand, they
-are as well quite limited to their intended purpose. A more flexible option is
-the \filter{u32} filter, which allows to match on arbitrary parts of the packet data -
-yet only on that, not any meta data associated to it by the kernel (with the
-exception of firewall mark value). So in order to continue this little
-exercise with \filter{u32}, we have to base classification directly upon the actual TOS
-value. An intuitive attempt might look like this:
-\begin{Verbatim}
-# alias tcfilter='tc filter add dev eth0 parent 1:'
-# tcfilter u32 match ip dsfield 0x10 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x12 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x14 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x16 0x1e classid 1:16
-# tcfilter u32 match ip dsfield 0x8 0x1e classid 1:12
-# tcfilter u32 match ip dsfield 0xa 0x1e classid 1:12
-# tcfilter u32 match ip dsfield 0xc 0x1e classid 1:12
-# tcfilter u32 match ip dsfield 0xe 0x1e classid 1:12
-\end{Verbatim}
-The obvious drawback here is the amount of filters needed. And without the
-default class, eight more filters would be necessary. This also has performance
-implications: A packet with TOS value 0xe will be checked eight times in total
-in order to determine it's destination class. While there's not much to be done
-about the number of filters, at least the performance problem can be eliminated
-by using \filter{u32}'s hash table support:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: prio 99 handle 1: u32 divisor 16
-\end{Verbatim}
-This creates a hash table with 16 buckets. The table size is arbitrary, but not
-random: Since the first bit of the TOS field is not interesting, it can be
-ignored and therefore the range of values to consider is just [0;15], i.e. a
-number of 16 different values. The next step is to populate the hash table:
-\begin{Verbatim}
-# alias tcfilter='tc filter add dev eth0 parent 1: prio 99'
-# tcfilter u32 match u8 0 0 ht 1:0: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:1: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:2: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:3: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:4: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:5: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:6: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:7: classid 1:12
-# tcfilter u32 match u8 0 0 ht 1:8: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:9: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:a: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:b: classid 1:16
-# tcfilter u32 match u8 0 0 ht 1:c: classid 1:10
-# tcfilter u32 match u8 0 0 ht 1:d: classid 1:10
-# tcfilter u32 match u8 0 0 ht 1:e: classid 1:10
-# tcfilter u32 match u8 0 0 ht 1:f: classid 1:10
-\end{Verbatim}
-The parameter \texttt{ht} denotes the hash table and bucket the filter should be added
-to. Since the first TOS bit is ignored, it's value has to be divided by two in
-order to get to the bucket it maps to. E.g. a TOS value of 0x10 will therefore
-map to bucket 0x8. For the sake of completeness, all possible values are mapped
-and therefore a configurable default class is not required. Note that the used
-match expression is not necessary, but mandatory. Therefore anything that
-matches any packet will suffice. Finally, a filter which links to the defined
-hash table is needed:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: prio 1 protocol ip u32 \
- link 1: hashkey mask 0x001e0000 match u8 0 0
-\end{Verbatim}
-Here again, the actual match statement is not necessary, but syntactically
-required. All the magic lies within the \texttt{hashkey} parameter, which defines which
-part of the packet should be used directly as hash key. Here's a drawing of the
-first four bytes of the IPv4 header, with the area selected by \texttt{hashkey mask}
-highlighted:
-\begin{figure}[H]
-\begin{Verbatim}
- 0 1 2 3
- .-----------------------------------------------------------------.
- | | | ######## | | |
- | Version| IHL | #DSCP### | ECN| Total Length |
- | | | ######## | | |
- '-----------------------------------------------------------------'
-\end{Verbatim}
-\end{figure}
-\noindent
-This may look confusing at first, but keep in mind that bit- as well as
-byte-ordering here is LSB while the mask value is written in MSB we humans use.
-Therefore reading the mask is done like so, starting from left:
-\begin{enumerate}
-\item Skip the first byte (which contains Version and IHL fields).
-\item Skip the lowest bit of the second byte (0x1e is even).
-\item Mark the four following bits (0x1e is 11110 in binary).
-\item Skip the remaining three bits of the second byte as well as the remaining two
- bytes.
-\end{enumerate}
-Before doing the lookup, the kernel right-shifts the masked value by the amount
-of zero-bits in \texttt{mask}, which implicitly also does the division by two which the
-hash table depends on. With this setup, every packet has to pass exactly two
-filters to be classified. Note that this filter is limited to IPv4 packets: Due
-to the related Traffic Class field being at a different offset in the packet, it
-would not work for IPv6. To use the same setup for IPv6 as well, a second
-entry-level filter is necessary:
-\begin{Verbatim}
-# tc filter add dev eth0 parent 1: prio 2 protocol ipv6 u32 \
- link 1: hashkey mask 0x01e00000 match u8 0 0
-\end{Verbatim}
-For illustration purposes, here again is a drawing of the first four bytes of
-the IPv6 header, again with masked area highlighted:
-\begin{figure}[H]
-\begin{Verbatim}
- 0 1 2 3
- .-----------------------------------------------------------------.
- | | ######## | |
- | Version| #Traffic Class| Flow Label |
- | | ######## | |
- '-----------------------------------------------------------------'
-\end{Verbatim}
-\end{figure}
-\noindent
-Reading the mask value is analogous to IPv4 with the added complexity that
-Traffic Class spans over two bytes. Yet, for comparison there's a simple trick:
-IPv6 has the interesting field shifted by four bits to the left, and the new
-mask's value is shifted by the same amount. For further information about
-\filter{u32} and what can be done with it, consult it's man page
-\man{tc-u32(8)}.
-
-Of course, the kernel provides many more filters than just \filter{basic},
-\filter{flow} and \filter{u32} which have been presented above. As of now, the
-remaining ones are:
-\begin{description}
-\item[bpf]
- Filtering using Berkeley Packet Filter programs. The program's return
- code determines the packet's destination class ID.
-
-\item[cgroup]
- Filter packets based on control groups. This is only useful for packets
- originating from the local host, as control groups only exist in that
- scope.
-
-\item[flower]
- An extended variant of the flow filter.
-
-\item[fw]
- Matches on firewall mark values previously assigned to the packet by
- netfilter (or a filter action, see below for details). This allows to
- export the classification algorithm into netfilter, which is very
- convenient if appropriate rules exist on the same system in there
- already.
-
-\item[route]
- Filter packets based on matching routing table entry. Basically
- equivalent to the \texttt{fw} filter above, to make use of an already existing
- extensive routing table setup.
-
-\item[rsvp, rsvp6]
- Implementation of the Resource Reservation Protocol in Linux, to react
- upon requests sent by an RSVP daemon.
-
-\item[tcindex]
- Match packets based on tcindex value, which is usually set by the dsmark
- qdisc. This is part of an approach to support Differentiated Services in
- Linux, which is another topic on it's own.
-\end{description}
-
-
-\section*{Filter Actions}
-
-The tc filter framework provides the infrastructure to another extensible set of
-tools as well, namely tc actions. As the name suggests, they allow to do things
-with packets (or associated data). (The list of) Actions are part of a given
-filter. If it matches, each action it contains is executed in order before
-returning the classification result. Since the action has direct access to the
-latter, it is in theory possible for an action to react upon or even change the
-filtering result - as long as the packet matched, of course. Yet none of the
-currently in-tree actions make use of this.
-
-The Generic Actions framework originally evolved out of the filters' ability to
-police traffic to a given maximum bandwidth. One common use case for that is to
-limit ingress traffic, dropping packets which exceed the threshold. A classic
-setup example is like so:
-\begin{Verbatim}
-# tc qdisc add dev eth0 handle ffff: ingress
-# tc filter add dev eth0 parent ffff: u32 \
- match u32 0 0
- police rate 1mbit burst 100k
-\end{Verbatim}
-The ingress qdisc is not a real one, but merely a point of reference for filters
-to attach to which should get applied to incoming traffic. The \filter{u32} filter added
-above matches on any packet and therefore limits the total incoming bandwidth to
-1mbit/s, allowing bursts of up to 100kbytes. Using the new syntax, the filter
-command changes slightly:
-\begin{Verbatim}
-# tc filter add dev eth0 parent ffff: u32 \
- match u32 0 0 \
- action police rate 1mbit burst 100k
-\end{Verbatim}
-The important detail is that this syntax allows to define multiple actions.
-E.g. for testing purposes, it is possible to redirect exceeding traffic to the
-loopback interface instead of dropping it:
-\begin{Verbatim}
-# tc filter add dev eth0 parent ffff: u32 \
- match u32 0 0 \
- action police rate 1mbit burst 100k conform-exceed pipe \
- action mirred egress redirect dev lo
-\end{Verbatim}
-The added parameter \texttt{conform-exceed pipe} tells the police action to allow for
-further actions to handle the exceeding packet.
-
-Apart from \texttt{police} and \texttt{mirred} actions, there are a few more. Here's a full
-list of the currently implemented ones:
-\begin{description}
-\item[bpf]
- Apply a Berkeley Packet Filter program to the packet.
-
-\item[connmark]
- Set the packet's firewall mark to that of it's connection. This works by
- searching the conntrack table for a matching entry. If found, the mark
- is restored.
-
-\item[csum]
- Trigger recalculation of packet checksums. The supported protocols are:
- IPv4, ICMP, IGMP, TCP, UDP and UDPLite.
-
-\item[ipt]
- Pass the packet to an iptables target. This allows to use iptables
- extensions directly instead of having to go the extra mile via setting
- an arbitrary firewall mark and matching on that from within netfilter.
-
-\item[mirred]
- Mirror or redirect packets. This is often combined with the ifb pseudo
- device to share a common QoS setup between multiple interfaces or even
- ingress traffic.
-
-\item[nat]
- Perform stateless Native Address Translation. This is certainly not
- complete and therefore inferior to NAT using iptables: Although the
- kernel module decides between TCP, UDP and ICMP traffic, it does not
- handle typical problematic protocols such as active FTP or SIP.
-
-\item[pedit]
- Generic packet editing. This allows to alter arbitrary bytes of the
- packet, either by specifying an offset into the packet or by naming a
- packet header and field name to change. Currently, the latter is
- implemented only for IPv4 yet.
-
-\item[police]
- Apply a bandwidth rate limiting policy. Packets exceeding it are dropped
- by default, but may optionally be handled differently.
-
-\item[simple]
- This is rather an example than real action. All it does is print a
- user-defined string together with a packet counter. Useful maybe for
- debugging when filter statistics are not available or too complicated.
-
-\item[skbedit]
- Edit associated packet data, supports changing queue mapping, priority
- field and firewall mark value.
-
-\item[vlan]
- Add/remove a VLAN header to/from the packet. This might serve as
- alternative to using 802.1Q pseudo-interfaces in combination with
- routing rules when e.g. packets for a given destination need to be
- encapsulated.
-\end{description}
-
-
-\section*{Intermediate Functional Block}
-
-The Intermediate Functional Block (\texttt{ifb}) pseudo network interface acts as a QoS
-concentrator for multiple different sources of traffic. Packets from or to other
-interfaces have to be redirected to it using the \texttt{mirred} action in order to be
-handled, regularly routed traffic will be dropped. This way, a single stack of
-qdiscs, classes and filters can be shared between multiple interfaces.
-
-Here's a simple example to feed incoming traffic from multiple interfaces
-through a Stochastic Fairness Queue (\qdisc{sfq}):
-\begin{Verbatim}
-(1) # modprobe ifb
-(2) # ip link set ifb0 up
-(3) # tc qdisc add dev ifb0 root sfq
-\end{Verbatim}
-The first step is to load the \texttt{ifb} kernel module (1). By default, this will
-create two ifb devices: \iface{ifb0} and \iface{ifb1}. After setting
-\iface{ifb0} up in (2), the root
-qdisc is replaced by \qdisc{sfq} in (3). Finally, one can start redirecting ingress
-traffic to \iface{ifb0}, e.g. from \iface{eth0}:
-\begin{Verbatim}
-# tc qdisc add dev eth0 handle ffff: ingress
-# tc filter add dev eth0 parent ffff: u32 \
- match u32 0 0 \
- action mirred egress redirect dev ifb0
-\end{Verbatim}
-The same can be done for other interfaces, just replacing \iface{eth0} in the two
-commands above. One thing to keep in mind here is the asymmetrical routing this
-creates within the host doing the QoS: Incoming packets enter the system via
-\iface{ifb0}, while corresponding replies leave directly via \iface{eth0}. This can be observed
-using \cmd{tcpdump} on \iface{ifb0}, which shows the input part of the traffic only. What's
-more confusing is that \cmd{tcpdump} on \iface{eth0} shows both incoming and outgoing traffic,
-but the redirection is still effective - a simple prove is setting
-\iface{ifb0} down,
-which will interrupt the communication. Obviously \cmd{tcpdump} catches the packets to
-dump before they enter the ingress qdisc, which is why it sees them while the
-kernel itself doesn't.
-
-
-\section*{Conclusion}
-
-Once the steep learning curve has been mastered, the conglomerate of (classful)
-qdiscs, filters and actions provides a highly sophisticated and flexible
-infrastructure to perform QoS, which plays nicely along with routing and
-firewalling setups.
-
-
-\section*{Further Reading}
-
-A good starting point for novice users and experienced ones diving into unknown
-areas is the extensive HOWTO at \url{http://lartc.org}. The iproute2 package ships
-some examples (usually in /usr/share/doc/, depending on distribution) as well as
-man pages for \cmd{tc} in general, qdiscs and filters. The latter have been added
-just recently though, so if your distribution does not ship iproute2 version
-4.3.0 yet, these are not in there. Apart from that, the internet is a spring of
-HOWTOs and scripts people wrote - though these should be taken with a grain of
-salt: The complexity of the matter often leads to copying others' solutions
-without much validation, which allows for less optimal or even obsolete
-implementations to survive much longer than desired.
-
-\end{document}
diff --git a/include/color.h b/include/color.h
index 1cd6f7d2..7fd685d0 100644
--- a/include/color.h
+++ b/include/color.h
@@ -2,14 +2,13 @@
#define __COLOR_H__ 1
enum color_attr {
- COLOR_NONE,
COLOR_IFNAME,
COLOR_MAC,
COLOR_INET,
COLOR_INET6,
COLOR_OPERSTATE_UP,
COLOR_OPERSTATE_DOWN,
- COLOR_CLEAR
+ COLOR_NONE
};
void enable_color(void);
diff --git a/include/json_print.h b/include/json_print.h
index b6ce1f9f..dc4d2bb3 100644
--- a/include/json_print.h
+++ b/include/json_print.h
@@ -53,7 +53,7 @@ void close_json_array(enum output_type type, const char *delim);
const char *fmt, \
type value) \
{ \
- print_color_##type_name(t, -1, key, fmt, value); \
+ print_color_##type_name(t, COLOR_NONE, key, fmt, value); \
}
_PRINT_FUNC(int, int);
_PRINT_FUNC(bool, bool);
diff --git a/include/linux/atm.h b/include/uapi/linux/atm.h
index 08e27beb..08e27beb 100644
--- a/include/linux/atm.h
+++ b/include/uapi/linux/atm.h
diff --git a/include/linux/atmapi.h b/include/uapi/linux/atmapi.h
index 8fe54d90..8fe54d90 100644
--- a/include/linux/atmapi.h
+++ b/include/uapi/linux/atmapi.h
diff --git a/include/linux/atmarp.h b/include/uapi/linux/atmarp.h
index 231f4bde..231f4bde 100644
--- a/include/linux/atmarp.h
+++ b/include/uapi/linux/atmarp.h
diff --git a/include/linux/atmdev.h b/include/uapi/linux/atmdev.h
index 8faa8b94..8faa8b94 100644
--- a/include/linux/atmdev.h
+++ b/include/uapi/linux/atmdev.h
diff --git a/include/linux/atmioc.h b/include/uapi/linux/atmioc.h
index 37f67aa8..37f67aa8 100644
--- a/include/linux/atmioc.h
+++ b/include/uapi/linux/atmioc.h
diff --git a/include/linux/atmsap.h b/include/uapi/linux/atmsap.h
index 799b1045..799b1045 100644
--- a/include/linux/atmsap.h
+++ b/include/uapi/linux/atmsap.h
diff --git a/include/linux/bpf.h b/include/uapi/linux/bpf.h
index 0895a529..0895a529 100644
--- a/include/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
diff --git a/include/linux/bpf_common.h b/include/uapi/linux/bpf_common.h
index afe7433b..afe7433b 100644
--- a/include/linux/bpf_common.h
+++ b/include/uapi/linux/bpf_common.h
diff --git a/include/linux/can.h b/include/uapi/linux/can.h
index f7a810de..f7a810de 100644
--- a/include/linux/can.h
+++ b/include/uapi/linux/can.h
diff --git a/include/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index b9214bd7..b9214bd7 100644
--- a/include/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
diff --git a/include/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h
index 5b29e8a7..5b29e8a7 100644
--- a/include/linux/can/vxcan.h
+++ b/include/uapi/linux/can/vxcan.h
diff --git a/include/linux/devlink.h b/include/uapi/linux/devlink.h
index a62695e2..a62695e2 100644
--- a/include/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
diff --git a/include/linux/elf-em.h b/include/uapi/linux/elf-em.h
index 9cd1de95..9cd1de95 100644
--- a/include/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
diff --git a/include/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index bbf02a63..bbf02a63 100644
--- a/include/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
diff --git a/include/linux/filter.h b/include/uapi/linux/filter.h
index e4f2f74c..e4f2f74c 100644
--- a/include/linux/filter.h
+++ b/include/uapi/linux/filter.h
diff --git a/include/linux/fou.h b/include/uapi/linux/fou.h
index 744c3238..744c3238 100644
--- a/include/linux/fou.h
+++ b/include/uapi/linux/fou.h
diff --git a/include/linux/gen_stats.h b/include/uapi/linux/gen_stats.h
index 52deccc2..52deccc2 100644
--- a/include/linux/gen_stats.h
+++ b/include/uapi/linux/gen_stats.h
diff --git a/include/linux/genetlink.h b/include/uapi/linux/genetlink.h
index 08239d8e..08239d8e 100644
--- a/include/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
diff --git a/include/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h
index 04bc0274..04bc0274 100644
--- a/include/linux/hdlc/ioctl.h
+++ b/include/uapi/linux/hdlc/ioctl.h
diff --git a/include/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index a2e839ee..a2e839ee 100644
--- a/include/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
diff --git a/include/linux/if.h b/include/uapi/linux/if.h
index b4ba0207..b4ba0207 100644
--- a/include/linux/if.h
+++ b/include/uapi/linux/if.h
diff --git a/include/linux/if_addr.h b/include/uapi/linux/if_addr.h
index 26f0ecff..26f0ecff 100644
--- a/include/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
diff --git a/include/linux/if_addrlabel.h b/include/uapi/linux/if_addrlabel.h
index 54580c29..54580c29 100644
--- a/include/linux/if_addrlabel.h
+++ b/include/uapi/linux/if_addrlabel.h
diff --git a/include/linux/if_alg.h b/include/uapi/linux/if_alg.h
index f2acd2fd..f2acd2fd 100644
--- a/include/linux/if_alg.h
+++ b/include/uapi/linux/if_alg.h
diff --git a/include/linux/if_arp.h b/include/uapi/linux/if_arp.h
index 199f253b..199f253b 100644
--- a/include/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
diff --git a/include/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index 9635a62f..9635a62f 100644
--- a/include/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
diff --git a/include/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 156f4434..156f4434 100644
--- a/include/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
diff --git a/include/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 7dde037a..7dde037a 100644
--- a/include/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
diff --git a/include/linux/if_link.h b/include/uapi/linux/if_link.h
index 1f97d056..1f97d056 100644
--- a/include/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
diff --git a/include/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index 22939a3e..22939a3e 100644
--- a/include/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
diff --git a/include/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 4df96a7d..4df96a7d 100644
--- a/include/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
diff --git a/include/linux/if_tun.h b/include/uapi/linux/if_tun.h
index d5ecb425..d5ecb425 100644
--- a/include/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
diff --git a/include/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 21834cac..21834cac 100644
--- a/include/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
diff --git a/include/linux/if_vlan.h b/include/uapi/linux/if_vlan.h
index 24ae0071..24ae0071 100644
--- a/include/linux/if_vlan.h
+++ b/include/uapi/linux/if_vlan.h
diff --git a/include/linux/ife.h b/include/uapi/linux/ife.h
index 2954da32..2954da32 100644
--- a/include/linux/ife.h
+++ b/include/uapi/linux/ife.h
diff --git a/include/linux/ila.h b/include/uapi/linux/ila.h
index 7e328d72..7e328d72 100644
--- a/include/linux/ila.h
+++ b/include/uapi/linux/ila.h
diff --git a/include/linux/in.h b/include/uapi/linux/in.h
index 9439efaa..9439efaa 100644
--- a/include/linux/in.h
+++ b/include/uapi/linux/in.h
diff --git a/include/linux/in6.h b/include/uapi/linux/in6.h
index 6f3bdee7..6f3bdee7 100644
--- a/include/linux/in6.h
+++ b/include/uapi/linux/in6.h
diff --git a/include/linux/in_route.h b/include/uapi/linux/in_route.h
index b261b8c9..b261b8c9 100644
--- a/include/linux/in_route.h
+++ b/include/uapi/linux/in_route.h
diff --git a/include/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index bada4d7b..bada4d7b 100644
--- a/include/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
diff --git a/include/linux/ip.h b/include/uapi/linux/ip.h
index 1907284c..1907284c 100644
--- a/include/linux/ip.h
+++ b/include/uapi/linux/ip.h
diff --git a/include/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h
index 425926c4..425926c4 100644
--- a/include/linux/ip6_tunnel.h
+++ b/include/uapi/linux/ip6_tunnel.h
diff --git a/include/linux/ipsec.h b/include/uapi/linux/ipsec.h
index d17a6302..d17a6302 100644
--- a/include/linux/ipsec.h
+++ b/include/uapi/linux/ipsec.h
diff --git a/include/linux/kernel.h b/include/uapi/linux/kernel.h
index 527549f5..527549f5 100644
--- a/include/linux/kernel.h
+++ b/include/uapi/linux/kernel.h
diff --git a/include/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 8a80007b..8a80007b 100644
--- a/include/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
diff --git a/include/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index f38571da..f38571da 100644
--- a/include/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
diff --git a/include/linux/limits.h b/include/uapi/linux/limits.h
index 2d0f9416..2d0f9416 100644
--- a/include/linux/limits.h
+++ b/include/uapi/linux/limits.h
diff --git a/include/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 32984262..32984262 100644
--- a/include/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
diff --git a/include/linux/magic.h b/include/uapi/linux/magic.h
index e439565d..e439565d 100644
--- a/include/linux/magic.h
+++ b/include/uapi/linux/magic.h
diff --git a/include/linux/mpls.h b/include/uapi/linux/mpls.h
index bf5b6259..bf5b6259 100644
--- a/include/linux/mpls.h
+++ b/include/uapi/linux/mpls.h
diff --git a/include/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h
index 1a0e57b4..1a0e57b4 100644
--- a/include/linux/mpls_iptunnel.h
+++ b/include/uapi/linux/mpls_iptunnel.h
diff --git a/include/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 3199d289..3199d289 100644
--- a/include/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
diff --git a/include/linux/net_namespace.h b/include/uapi/linux/net_namespace.h
index 9a92b7e1..9a92b7e1 100644
--- a/include/linux/net_namespace.h
+++ b/include/uapi/linux/net_namespace.h
diff --git a/include/linux/netconf.h b/include/uapi/linux/netconf.h
index 4afbd7db..4afbd7db 100644
--- a/include/linux/netconf.h
+++ b/include/uapi/linux/netconf.h
diff --git a/include/linux/netdevice.h b/include/uapi/linux/netdevice.h
index 66fceb44..66fceb44 100644
--- a/include/linux/netdevice.h
+++ b/include/uapi/linux/netdevice.h
diff --git a/include/linux/netfilter.h b/include/uapi/linux/netfilter.h
index ff4a4a52..ff4a4a52 100644
--- a/include/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index a6c96b00..a6c96b00 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
diff --git a/include/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h
index 41209700..41209700 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/uapi/linux/netfilter/x_tables.h
diff --git a/include/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h
index d4e02348..d4e02348 100644
--- a/include/linux/netfilter/xt_set.h
+++ b/include/uapi/linux/netfilter/xt_set.h
diff --git a/include/linux/netfilter/xt_tcpudp.h b/include/uapi/linux/netfilter/xt_tcpudp.h
index 38aa7b39..38aa7b39 100644
--- a/include/linux/netfilter/xt_tcpudp.h
+++ b/include/uapi/linux/netfilter/xt_tcpudp.h
diff --git a/include/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h
index a5f4dc78..a5f4dc78 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/uapi/linux/netfilter_ipv4.h
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h
index 456fb863..456fb863 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/uapi/linux/netfilter_ipv4/ip_tables.h
diff --git a/include/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h
index 8483d1d4..8483d1d4 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/uapi/linux/netfilter_ipv6.h
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h
index fcc8ccaf..fcc8ccaf 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h
diff --git a/include/linux/netlink.h b/include/uapi/linux/netlink.h
index ec0690b5..ec0690b5 100644
--- a/include/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
diff --git a/include/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
index c8c8c7d2..c8c8c7d2 100644
--- a/include/linux/netlink_diag.h
+++ b/include/uapi/linux/netlink_diag.h
diff --git a/include/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index 0c5d5dd6..0c5d5dd6 100644
--- a/include/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
diff --git a/include/linux/param.h b/include/uapi/linux/param.h
index 092e92f6..092e92f6 100644
--- a/include/linux/param.h
+++ b/include/uapi/linux/param.h
diff --git a/include/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h
index ada7f017..ada7f017 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/uapi/linux/pfkeyv2.h
diff --git a/include/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index d5e2bf68..d5e2bf68 100644
--- a/include/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
diff --git a/include/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 099bf552..099bf552 100644
--- a/include/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
diff --git a/include/linux/posix_types.h b/include/uapi/linux/posix_types.h
index 988f76e6..988f76e6 100644
--- a/include/linux/posix_types.h
+++ b/include/uapi/linux/posix_types.h
diff --git a/include/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 813e9e07..813e9e07 100644
--- a/include/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
diff --git a/include/linux/sctp.h b/include/uapi/linux/sctp.h
index fec24c41..fec24c41 100644
--- a/include/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
diff --git a/include/linux/seg6.h b/include/uapi/linux/seg6.h
index 07152792..07152792 100644
--- a/include/linux/seg6.h
+++ b/include/uapi/linux/seg6.h
diff --git a/include/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h
index 99382f94..99382f94 100644
--- a/include/linux/seg6_genl.h
+++ b/include/uapi/linux/seg6_genl.h
diff --git a/include/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h
index 704f93e8..704f93e8 100644
--- a/include/linux/seg6_hmac.h
+++ b/include/uapi/linux/seg6_hmac.h
diff --git a/include/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
index a5dc05a1..a5dc05a1 100644
--- a/include/linux/seg6_iptunnel.h
+++ b/include/uapi/linux/seg6_iptunnel.h
diff --git a/include/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index 76b90d60..76b90d60 100644
--- a/include/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
diff --git a/include/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 901231e6..901231e6 100644
--- a/include/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
diff --git a/include/linux/socket.h b/include/uapi/linux/socket.h
index 8c1e5017..8c1e5017 100644
--- a/include/linux/socket.h
+++ b/include/uapi/linux/socket.h
diff --git a/include/linux/sockios.h b/include/uapi/linux/sockios.h
index 79d029d2..79d029d2 100644
--- a/include/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
diff --git a/include/linux/stddef.h b/include/uapi/linux/stddef.h
index 4bb69dec..4bb69dec 100644
--- a/include/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
diff --git a/include/linux/sysinfo.h b/include/uapi/linux/sysinfo.h
index 934335a2..934335a2 100644
--- a/include/linux/sysinfo.h
+++ b/include/uapi/linux/sysinfo.h
diff --git a/include/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index 8dc2ac05..8dc2ac05 100644
--- a/include/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
diff --git a/include/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h
index 62a5e944..62a5e944 100644
--- a/include/linux/tc_act/tc_connmark.h
+++ b/include/uapi/linux/tc_act/tc_connmark.h
diff --git a/include/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h
index a11bb355..a11bb355 100644
--- a/include/linux/tc_act/tc_csum.h
+++ b/include/uapi/linux/tc_act/tc_csum.h
diff --git a/include/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h
index d2a3abb7..d2a3abb7 100644
--- a/include/linux/tc_act/tc_defact.h
+++ b/include/uapi/linux/tc_act/tc_defact.h
diff --git a/include/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h
index 70b536a8..70b536a8 100644
--- a/include/linux/tc_act/tc_gact.h
+++ b/include/uapi/linux/tc_act/tc_gact.h
diff --git a/include/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h
index 7c281786..7c281786 100644
--- a/include/linux/tc_act/tc_ife.h
+++ b/include/uapi/linux/tc_act/tc_ife.h
diff --git a/include/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h
index 7c6e155d..7c6e155d 100644
--- a/include/linux/tc_act/tc_ipt.h
+++ b/include/uapi/linux/tc_act/tc_ipt.h
diff --git a/include/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
index 3d7a2b35..3d7a2b35 100644
--- a/include/linux/tc_act/tc_mirred.h
+++ b/include/uapi/linux/tc_act/tc_mirred.h
diff --git a/include/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h
index 923457c9..923457c9 100644
--- a/include/linux/tc_act/tc_nat.h
+++ b/include/uapi/linux/tc_act/tc_nat.h
diff --git a/include/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h
index 143d2b31..143d2b31 100644
--- a/include/linux/tc_act/tc_pedit.h
+++ b/include/uapi/linux/tc_act/tc_pedit.h
diff --git a/include/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h
index edc9058b..edc9058b 100644
--- a/include/linux/tc_act/tc_sample.h
+++ b/include/uapi/linux/tc_act/tc_sample.h
diff --git a/include/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 28844257..28844257 100644
--- a/include/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
diff --git a/include/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
index 10fc07da..10fc07da 100644
--- a/include/linux/tc_act/tc_skbmod.h
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
diff --git a/include/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
index afcd4be9..afcd4be9 100644
--- a/include/linux/tc_act/tc_tunnel_key.h
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
diff --git a/include/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index bddb272b..bddb272b 100644
--- a/include/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/uapi/linux/tc_ematch/tc_em_cmp.h
index f34bb1ba..f34bb1ba 100644
--- a/include/linux/tc_ematch/tc_em_cmp.h
+++ b/include/uapi/linux/tc_ematch/tc_em_cmp.h
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/uapi/linux/tc_ematch/tc_em_meta.h
index b11f8ce2..b11f8ce2 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/uapi/linux/tc_ematch/tc_em_meta.h
diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/uapi/linux/tc_ematch/tc_em_nbyte.h
index 7172cfb9..7172cfb9 100644
--- a/include/linux/tc_ematch/tc_em_nbyte.h
+++ b/include/uapi/linux/tc_ematch/tc_em_nbyte.h
diff --git a/include/linux/tcp.h b/include/uapi/linux/tcp.h
index 8edad3f9..8edad3f9 100644
--- a/include/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
diff --git a/include/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index 80ad90d0..80ad90d0 100644
--- a/include/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
diff --git a/include/linux/tipc.h b/include/uapi/linux/tipc.h
index 924fb5cf..924fb5cf 100644
--- a/include/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
diff --git a/include/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index f9edd20f..f9edd20f 100644
--- a/include/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
diff --git a/include/linux/types.h b/include/uapi/linux/types.h
index c640657a..c640657a 100644
--- a/include/linux/types.h
+++ b/include/uapi/linux/types.h
diff --git a/include/linux/unix_diag.h b/include/uapi/linux/unix_diag.h
index 1eb0b8dd..1eb0b8dd 100644
--- a/include/linux/unix_diag.h
+++ b/include/uapi/linux/unix_diag.h
diff --git a/include/linux/veth.h b/include/uapi/linux/veth.h
index 3354c1eb..3354c1eb 100644
--- a/include/linux/veth.h
+++ b/include/uapi/linux/veth.h
diff --git a/include/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 5790293b..5790293b 100644
--- a/include/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
diff --git a/include/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 861440a8..13875a3f 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -1,5 +1,5 @@
-#ifndef _UAPI_RDMA_NETLINK_H
-#define _UAPI_RDMA_NETLINK_H
+#ifndef _RDMA_NETLINK_H
+#define _RDMA_NETLINK_H
#include <linux/types.h>
@@ -304,4 +304,4 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_MAX
};
-#endif /* _UAPI_RDMA_NETLINK_H */
+#endif /* _RDMA_NETLINK_H */
diff --git a/include/utils.h b/include/utils.h
index c9ed230b..3d91c50d 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -133,6 +133,8 @@ void missarg(const char *) __attribute__((noreturn));
void invarg(const char *, const char *) __attribute__((noreturn));
void duparg(const char *, const char *) __attribute__((noreturn));
void duparg2(const char *, const char *) __attribute__((noreturn));
+int check_ifname(const char *);
+int get_ifname(char *, const char *);
int matches(const char *arg, const char *pattern);
int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits);
@@ -193,6 +195,8 @@ static inline void __jiffies_to_tv(struct timeval *tv, unsigned long jiffies)
tv->tv_usec = tvusec - 1000000 * tv->tv_sec;
}
+void print_escape_buf(const __u8 *buf, size_t len, const char *escape);
+
int print_timestamp(FILE *fp);
void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c
index b4a7def1..bc44bef7 100644
--- a/ip/ip6tunnel.c
+++ b/ip/ip6tunnel.c
@@ -136,7 +136,7 @@ static void print_tunnel(struct ip6_tnl_parm2 *p)
static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
{
int count = 0;
- char medium[IFNAMSIZ] = {};
+ const char *medium = NULL;
while (argc > 0) {
if (strcmp(*argv, "mode") == 0) {
@@ -180,7 +180,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
memcpy(&p->laddr, &laddr.data, sizeof(p->laddr));
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
- strncpy(medium, *argv, IFNAMSIZ - 1);
+ medium = *argv;
} else if (strcmp(*argv, "encaplimit") == 0) {
NEXT_ARG();
if (strcmp(*argv, "none") == 0) {
@@ -273,7 +273,8 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
usage();
if (p->name[0])
duparg2("name", *argv);
- strncpy(p->name, *argv, IFNAMSIZ - 1);
+ if (get_ifname(p->name, *argv))
+ invarg("\"name\" not a valid ifname", *argv);
if (cmd == SIOCCHGTUNNEL && count == 0) {
struct ip6_tnl_parm2 old_p = {};
@@ -285,7 +286,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
count++;
argc--; argv++;
}
- if (medium[0]) {
+ if (medium) {
p->link = ll_name_to_index(medium);
if (p->link == 0) {
fprintf(stderr, "Cannot find device \"%s\"\n", medium);
diff --git a/ip/ipl2tp.c b/ip/ipl2tp.c
index 88664c90..1e37b175 100644
--- a/ip/ipl2tp.c
+++ b/ip/ipl2tp.c
@@ -182,7 +182,7 @@ static int create_session(struct l2tp_parm *p)
if (p->peer_cookie_len)
addattr_l(&req.n, 1024, L2TP_ATTR_PEER_COOKIE,
p->peer_cookie, p->peer_cookie_len);
- if (p->ifname && p->ifname[0])
+ if (p->ifname)
addattrstrz(&req.n, 1024, L2TP_ATTR_IFNAME, p->ifname);
if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
@@ -545,6 +545,8 @@ static int parse_args(int argc, char **argv, int cmd, struct l2tp_parm *p)
}
} else if (strcmp(*argv, "name") == 0) {
NEXT_ARG();
+ if (check_ifname(*argv))
+ invarg("\"name\" not a valid ifname", *argv);
p->ifname = *argv;
} else if (strcmp(*argv, "remote") == 0) {
NEXT_ARG();
diff --git a/ip/iplink.c b/ip/iplink.c
index ff5b56c0..6a96ea9f 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -573,6 +573,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
req->i.ifi_flags &= ~IFF_UP;
} else if (strcmp(*argv, "name") == 0) {
NEXT_ARG();
+ if (check_ifname(*argv))
+ invarg("\"name\" not a valid ifname", *argv);
*name = *argv;
} else if (strcmp(*argv, "index") == 0) {
NEXT_ARG();
@@ -848,6 +850,8 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
NEXT_ARG();
if (*dev)
duparg2("dev", *argv);
+ if (check_ifname(*argv))
+ invarg("\"dev\" not a valid ifname", *argv);
*dev = *argv;
dev_index = ll_name_to_index(*dev);
}
@@ -870,7 +874,6 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req,
static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv)
{
- int len;
char *dev = NULL;
char *name = NULL;
char *link = NULL;
@@ -960,13 +963,8 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv)
}
if (name) {
- len = strlen(name) + 1;
- if (len == 1)
- invarg("\"\" is not a valid device identifier\n",
- "name");
- if (len > IFNAMSIZ)
- invarg("\"name\" too long\n", name);
- addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, len);
+ addattr_l(&req.n, sizeof(req),
+ IFLA_IFNAME, name, strlen(name) + 1);
}
if (type) {
@@ -1016,7 +1014,6 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv)
int iplink_get(unsigned int flags, char *name, __u32 filt_mask)
{
- int len;
struct iplink_req req = {
.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.n.nlmsg_flags = NLM_F_REQUEST | flags,
@@ -1029,13 +1026,8 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask)
} answer;
if (name) {
- len = strlen(name) + 1;
- if (len == 1)
- invarg("\"\" is not a valid device identifier\n",
- "name");
- if (len > IFNAMSIZ)
- invarg("\"name\" too long\n", name);
- addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, len);
+ addattr_l(&req.n, sizeof(req),
+ IFLA_IFNAME, name, strlen(name) + 1);
}
addattr32(&req.n, sizeof(req), IFLA_EXT_MASK, filt_mask);
@@ -1265,6 +1257,8 @@ static int do_set(int argc, char **argv)
flags &= ~IFF_UP;
} else if (strcmp(*argv, "name") == 0) {
NEXT_ARG();
+ if (check_ifname(*argv))
+ invarg("\"name\" not a valid ifname", *argv);
newname = *argv;
} else if (matches(*argv, "address") == 0) {
NEXT_ARG();
@@ -1355,6 +1349,8 @@ static int do_set(int argc, char **argv)
if (dev)
duparg2("dev", *argv);
+ if (check_ifname(*argv))
+ invarg("\"dev\" not a valid ifname", *argv);
dev = *argv;
}
argc--; argv++;
@@ -1383,9 +1379,6 @@ static int do_set(int argc, char **argv)
}
if (newname && strcmp(dev, newname)) {
- if (strlen(newname) == 0)
- invarg("\"\" is not a valid device identifier\n",
- "name");
if (do_changename(dev, newname) < 0)
return -1;
dev = newname;
diff --git a/ip/ipmaddr.c b/ip/ipmaddr.c
index 85a69e77..5683f6fa 100644
--- a/ip/ipmaddr.c
+++ b/ip/ipmaddr.c
@@ -284,7 +284,8 @@ static int multiaddr_modify(int cmd, int argc, char **argv)
NEXT_ARG();
if (ifr.ifr_name[0])
duparg("dev", *argv);
- strncpy(ifr.ifr_name, *argv, IFNAMSIZ);
+ if (get_ifname(ifr.ifr_name, *argv))
+ invarg("\"dev\" not a valid ifname", *argv);
} else {
if (matches(*argv, "address") == 0) {
NEXT_ARG();
diff --git a/ip/iproute.c b/ip/iproute.c
index a8733f45..e81bc05e 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -574,10 +574,10 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
for (i = 2; i <= RTAX_MAX; i++) {
__u32 val = 0U;
- if (mxrta[i] == NULL)
+ if (mxrta[i] == NULL && !(mxlock & (1 << i)))
continue;
- if (i != RTAX_CC_ALGO)
+ if (mxrta[i] != NULL && i != RTAX_CC_ALGO)
val = rta_getattr_u32(mxrta[i]);
if (i == RTAX_HOPLIMIT && (int)val == -1)
diff --git a/ip/iprule.c b/ip/iprule.c
index 8313138d..36c57fa7 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -472,11 +472,13 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action)
} else if (strcmp(*argv, "dev") == 0 ||
strcmp(*argv, "iif") == 0) {
NEXT_ARG();
- strncpy(filter.iif, *argv, IFNAMSIZ);
+ if (get_ifname(filter.iif, *argv))
+ invarg("\"iif\"/\"dev\" not a valid ifname", *argv);
filter.iifmask = 1;
} else if (strcmp(*argv, "oif") == 0) {
NEXT_ARG();
- strncpy(filter.oif, *argv, IFNAMSIZ);
+ if (get_ifname(filter.oif, *argv))
+ invarg("\"oif\" not a valid ifname", *argv);
filter.oifmask = 1;
} else if (strcmp(*argv, "l3mdev") == 0) {
filter.l3mdev = 1;
@@ -695,10 +697,14 @@ static int iprule_modify(int cmd, int argc, char **argv)
} else if (strcmp(*argv, "dev") == 0 ||
strcmp(*argv, "iif") == 0) {
NEXT_ARG();
+ if (check_ifname(*argv))
+ invarg("\"iif\"/\"dev\" not a valid ifname", *argv);
addattr_l(&req.n, sizeof(req), FRA_IFNAME,
*argv, strlen(*argv)+1);
} else if (strcmp(*argv, "oif") == 0) {
NEXT_ARG();
+ if (check_ifname(*argv))
+ invarg("\"oif\" not a valid ifname", *argv);
addattr_l(&req.n, sizeof(req), FRA_OIFNAME,
*argv, strlen(*argv)+1);
} else if (strcmp(*argv, "l3mdev") == 0) {
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index 105d0f55..208a1f06 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -60,7 +60,7 @@ static void set_tunnel_proto(struct ip_tunnel_parm *p, int proto)
static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
{
int count = 0;
- char medium[IFNAMSIZ] = {};
+ const char *medium = NULL;
int isatap = 0;
memset(p, 0, sizeof(*p));
@@ -139,7 +139,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
p->iph.saddr = htonl(INADDR_ANY);
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
- strncpy(medium, *argv, IFNAMSIZ - 1);
+ medium = *argv;
} else if (strcmp(*argv, "ttl") == 0 ||
strcmp(*argv, "hoplimit") == 0 ||
strcmp(*argv, "hlim") == 0) {
@@ -178,7 +178,8 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
if (p->name[0])
duparg2("name", *argv);
- strncpy(p->name, *argv, IFNAMSIZ - 1);
+ if (get_ifname(p->name, *argv))
+ invarg("\"name\" not a valid ifname", *argv);
if (cmd == SIOCCHGTUNNEL && count == 0) {
struct ip_tunnel_parm old_p = {};
@@ -216,7 +217,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
}
}
- if (medium[0]) {
+ if (medium) {
p->link = ll_name_to_index(medium);
if (p->link == 0) {
fprintf(stderr, "Cannot find device \"%s\"\n", medium);
@@ -465,9 +466,8 @@ static int do_prl(int argc, char **argv)
{
struct ip_tunnel_prl p = {};
int count = 0;
- int devname = 0;
int cmd = 0;
- char medium[IFNAMSIZ] = {};
+ const char *medium = NULL;
while (argc > 0) {
if (strcmp(*argv, "prl-default") == 0) {
@@ -488,8 +488,9 @@ static int do_prl(int argc, char **argv)
count++;
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
- strncpy(medium, *argv, IFNAMSIZ-1);
- devname++;
+ if (check_ifname(*argv))
+ invarg("\"dev\" not a valid ifname", *argv);
+ medium = *argv;
} else {
fprintf(stderr,
"Invalid PRL parameter \"%s\"\n", *argv);
@@ -502,7 +503,7 @@ static int do_prl(int argc, char **argv)
}
argc--; argv++;
}
- if (devname == 0) {
+ if (!medium) {
fprintf(stderr, "Must specify device\n");
exit(-1);
}
@@ -513,9 +514,8 @@ static int do_prl(int argc, char **argv)
static int do_6rd(int argc, char **argv)
{
struct ip_tunnel_6rd ip6rd = {};
- int devname = 0;
int cmd = 0;
- char medium[IFNAMSIZ] = {};
+ const char *medium = NULL;
inet_prefix prefix;
while (argc > 0) {
@@ -537,8 +537,9 @@ static int do_6rd(int argc, char **argv)
cmd = SIOCDEL6RD;
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
- strncpy(medium, *argv, IFNAMSIZ-1);
- devname++;
+ if (check_ifname(*argv))
+ invarg("\"dev\" not a valid ifname", *argv);
+ medium = *argv;
} else {
fprintf(stderr,
"Invalid 6RD parameter \"%s\"\n", *argv);
@@ -546,7 +547,7 @@ static int do_6rd(int argc, char **argv)
}
argc--; argv++;
}
- if (devname == 0) {
+ if (!medium) {
fprintf(stderr, "Must specify device\n");
exit(-1);
}
diff --git a/ip/iptuntap.c b/ip/iptuntap.c
index 451f7f0e..b46e452f 100644
--- a/ip/iptuntap.c
+++ b/ip/iptuntap.c
@@ -176,7 +176,8 @@ static int parse_args(int argc, char **argv,
ifr->ifr_flags |= IFF_MULTI_QUEUE;
} else if (matches(*argv, "dev") == 0) {
NEXT_ARG();
- strncpy(ifr->ifr_name, *argv, IFNAMSIZ-1);
+ if (get_ifname(ifr->ifr_name, *argv))
+ invarg("\"dev\" not a valid ifname", *argv);
} else {
if (matches(*argv, "name") == 0) {
NEXT_ARG();
@@ -184,7 +185,8 @@ static int parse_args(int argc, char **argv,
usage();
if (ifr->ifr_name[0])
duparg2("name", *argv);
- strncpy(ifr->ifr_name, *argv, IFNAMSIZ);
+ if (get_ifname(ifr->ifr_name, *argv))
+ invarg("\"name\" not a valid ifname", *argv);
}
count++;
argc--; argv++;
diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 4483fb8f..99fdec23 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -539,7 +539,7 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv)
xfrm_algo_parse((void *)&alg, type, name, key,
buf, sizeof(alg.buf));
- len += alg.u.alg.alg_key_len;
+ len += alg.u.alg.alg_key_len / 8;
addattr_l(&req.n, sizeof(req.buf), type,
(void *)&alg, len);
diff --git a/lib/color.c b/lib/color.c
index 79d5e289..8d049a01 100644
--- a/lib/color.c
+++ b/lib/color.c
@@ -45,8 +45,8 @@ static const char * const color_codes[] = {
NULL,
};
-static enum color attr_colors[] = {
- /* light background */
+/* light background */
+static enum color attr_colors_light[] = {
C_CYAN,
C_YELLOW,
C_MAGENTA,
@@ -54,8 +54,10 @@ static enum color attr_colors[] = {
C_GREEN,
C_RED,
C_CLEAR,
+};
- /* dark background */
+/* dark background */
+static enum color attr_colors_dark[] = {
C_BOLD_CYAN,
C_BOLD_YELLOW,
C_BOLD_MAGENTA,
@@ -109,8 +111,9 @@ int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...)
goto end;
}
- ret += fprintf(fp, "%s",
- color_codes[attr_colors[is_dark_bg ? attr + 8 : attr]]);
+ ret += fprintf(fp, "%s", color_codes[is_dark_bg ?
+ attr_colors_dark[attr] : attr_colors_light[attr]]);
+
ret += vfprintf(fp, fmt, args);
ret += fprintf(fp, "%s", color_codes[C_CLEAR]);
@@ -127,7 +130,7 @@ enum color_attr ifa_family_color(__u8 ifa_family)
case AF_INET6:
return COLOR_INET6;
default:
- return COLOR_CLEAR;
+ return COLOR_NONE;
}
}
@@ -139,6 +142,6 @@ enum color_attr oper_state_color(__u8 state)
case IF_OPER_DOWN:
return COLOR_OPERSTATE_DOWN;
default:
- return COLOR_CLEAR;
+ return COLOR_NONE;
}
}
diff --git a/lib/utils.c b/lib/utils.c
index bbd3cbc4..ac155bf5 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -20,6 +20,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
+#include <ctype.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <asm/types.h>
@@ -30,6 +31,7 @@
#include <time.h>
#include <sys/time.h>
#include <errno.h>
+#include <ctype.h>
#include "rt_names.h"
#include "utils.h"
@@ -699,6 +701,34 @@ void duparg2(const char *key, const char *arg)
exit(-1);
}
+int check_ifname(const char *name)
+{
+ /* These checks mimic kernel checks in dev_valid_name */
+ if (*name == '\0')
+ return -1;
+ if (strlen(name) >= IFNAMSIZ)
+ return -1;
+
+ while (*name) {
+ if (*name == '/' || isspace(*name))
+ return -1;
+ ++name;
+ }
+ return 0;
+}
+
+/* buf is assumed to be IFNAMSIZ */
+int get_ifname(char *buf, const char *name)
+{
+ int ret;
+
+ ret = check_ifname(name);
+ if (ret == 0)
+ strncpy(buf, name, IFNAMSIZ);
+
+ return ret;
+}
+
int matches(const char *cmd, const char *pattern)
{
int len = strlen(cmd);
@@ -1018,6 +1048,20 @@ int addr64_n2a(__u64 addr, char *buff, size_t len)
return written;
}
+/* Print buffer and escape bytes that are !isprint or among 'escape' */
+void print_escape_buf(const __u8 *buf, size_t len, const char *escape)
+{
+ size_t i;
+
+ for (i = 0; i < len; ++i) {
+ if (isprint(buf[i]) && buf[i] != '\\' &&
+ !strchr(escape, buf[i]))
+ printf("%c", buf[i]);
+ else
+ printf("\\%03o", buf[i]);
+ }
+}
+
int print_timestamp(FILE *fp)
{
struct timeval tv;
@@ -1231,6 +1275,7 @@ int get_real_family(int rtm_type, int rtm_family)
return rtm_family;
}
+#ifdef NEED_STRLCPY
size_t strlcpy(char *dst, const char *src, size_t size)
{
size_t srclen = strlen(src);
@@ -1253,3 +1298,4 @@ size_t strlcat(char *dst, const char *src, size_t size)
return dlen + strlcpy(dst + dlen, src, size - dlen);
}
+#endif
diff --git a/misc/arpd.c b/misc/arpd.c
index bfab4454..c2666f76 100644
--- a/misc/arpd.c
+++ b/misc/arpd.c
@@ -664,7 +664,8 @@ int main(int argc, char **argv)
struct ifreq ifr = {};
for (i = 0; i < ifnum; i++) {
- strncpy(ifr.ifr_name, ifnames[i], IFNAMSIZ);
+ if (get_ifname(ifr.ifr_name, ifnames[i]))
+ invarg("not a valid ifname", ifnames[i]);
if (ioctl(udp_sock, SIOCGIFINDEX, &ifr)) {
perror("ioctl(SIOCGIFINDEX)");
exit(-1);
diff --git a/misc/ss.c b/misc/ss.c
index dd8dfaa4..09bff8a7 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -2153,6 +2153,16 @@ static void print_skmeminfo(struct rtattr *tb[], int attrtype)
printf(")");
}
+static void print_md5sig(struct tcp_diag_md5sig *sig)
+{
+ printf("%s/%d=",
+ format_host(sig->tcpm_family,
+ sig->tcpm_family == AF_INET6 ? 16 : 4,
+ &sig->tcpm_addr),
+ sig->tcpm_prefixlen);
+ print_escape_buf(sig->tcpm_key, sig->tcpm_keylen, " ,");
+}
+
#define TCPI_HAS_OPT(info, opt) !!(info->tcpi_options & (opt))
static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
@@ -2289,6 +2299,17 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
free(s.dctcp);
free(s.bbr_info);
}
+ if (tb[INET_DIAG_MD5SIG]) {
+ struct tcp_diag_md5sig *sig = RTA_DATA(tb[INET_DIAG_MD5SIG]);
+ int len = RTA_PAYLOAD(tb[INET_DIAG_MD5SIG]);
+
+ printf(" md5keys:");
+ print_md5sig(sig++);
+ for (len -= sizeof(*sig); len > 0; len -= sizeof(*sig)) {
+ printf(",");
+ print_md5sig(sig++);
+ }
+ }
}
static const char *format_host_sa(struct sockaddr_storage *sa)
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 934832e2..b1802107 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -629,11 +629,10 @@ static int flower_parse_opt(struct filter_util *qu, char *handle,
} else if (matches(*argv, "skip_sw") == 0) {
flags |= TCA_CLS_FLAGS_SKIP_SW;
} else if (matches(*argv, "indev") == 0) {
- char ifname[IFNAMSIZ] = {};
-
NEXT_ARG();
- strncpy(ifname, *argv, sizeof(ifname) - 1);
- addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, ifname);
+ if (check_ifname(*argv))
+ invarg("\"indev\" not a valid ifname", *argv);
+ addattrstrz(n, MAX_MSG, TCA_FLOWER_INDEV, *argv);
} else if (matches(*argv, "vlan_id") == 0) {
__u16 vid;
diff --git a/tc/f_u32.c b/tc/f_u32.c
index 5815be9c..14b95889 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -385,8 +385,7 @@ static int parse_ip6_addr(int *argc_p, char ***argv_p,
plen = addr.bitlen;
for (i = 0; i < plen; i += 32) {
- /* if (((i + 31) & ~0x1F) <= plen) { */
- if (i + 31 <= plen) {
+ if (i + 31 < plen) {
res = pack_key(sel, addr.data[i / 32],
0xFFFFFFFF, off + 4 * (i / 32), offmask);
if (res < 0)
diff --git a/tc/q_netem.c b/tc/q_netem.c
index 5a9e7474..cdaddce9 100644
--- a/tc/q_netem.c
+++ b/tc/q_netem.c
@@ -231,7 +231,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
if (!strcmp(*argv, "random")) {
NEXT_ARG();
-random_loss_model:
+ random_loss_model:
if (get_percent(&opt.loss, *argv)) {
explain1("loss percent");
return -1;
@@ -338,7 +338,7 @@ random_loss_model:
return -1;
}
} else if (matches(*argv, "ecn") == 0) {
- present[TCA_NETEM_ECN] = 1;
+ present[TCA_NETEM_ECN] = 1;
} else if (matches(*argv, "reorder") == 0) {
NEXT_ARG();
present[TCA_NETEM_REORDER] = 1;
@@ -469,7 +469,7 @@ random_loss_model:
if (present[TCA_NETEM_CORR] &&
addattr_l(n, 1024, TCA_NETEM_CORR, &cor, sizeof(cor)) < 0)
- return -1;
+ return -1;
if (present[TCA_NETEM_REORDER] &&
addattr_l(n, 1024, TCA_NETEM_REORDER, &reorder, sizeof(reorder)) < 0)
@@ -478,7 +478,7 @@ random_loss_model:
if (present[TCA_NETEM_ECN] &&
addattr_l(n, 1024, TCA_NETEM_ECN, &present[TCA_NETEM_ECN],
sizeof(present[TCA_NETEM_ECN])) < 0)
- return -1;
+ return -1;
if (present[TCA_NETEM_CORRUPT] &&
addattr_l(n, 1024, TCA_NETEM_CORRUPT, &corrupt, sizeof(corrupt)) < 0)
@@ -491,11 +491,11 @@ random_loss_model:
if (loss_type == NETEM_LOSS_GI) {
if (addattr_l(n, 1024, NETEM_LOSS_GI,
&gimodel, sizeof(gimodel)) < 0)
- return -1;
+ return -1;
} else if (loss_type == NETEM_LOSS_GE) {
if (addattr_l(n, 1024, NETEM_LOSS_GE,
&gemodel, sizeof(gemodel)) < 0)
- return -1;
+ return -1;
} else {
fprintf(stderr, "loss in the weeds!\n");
return -1;
diff --git a/testsuite/tests/ip/link/new_link.t b/testsuite/tests/ip/link/new_link.t
index 699adbcd..c17650a2 100755
--- a/testsuite/tests/ip/link/new_link.t
+++ b/testsuite/tests/ip/link/new_link.t
@@ -1,6 +1,6 @@
#!/bin/sh
-source lib/generic.sh
+. lib/generic.sh
ts_log "[Testing add/del virtual links]"
diff --git a/testsuite/tests/ip/link/show_dev_wo_vf_rate.t b/testsuite/tests/ip/link/show_dev_wo_vf_rate.t
index a600ba65..5b3c004e 100755
--- a/testsuite/tests/ip/link/show_dev_wo_vf_rate.t
+++ b/testsuite/tests/ip/link/show_dev_wo_vf_rate.t
@@ -1,6 +1,6 @@
#!/bin/sh
-source lib/generic.sh
+. lib/generic.sh
NL_FILE="tests/ip/link/dev_wo_vf_rate.nl"
ts_ip "$0" "Show VF devices w/o VF rate info" -d monitor file $NL_FILE
diff --git a/testsuite/tests/ip/netns/set_nsid.t b/testsuite/tests/ip/netns/set_nsid.t
index 606d45ab..8f8c7792 100755
--- a/testsuite/tests/ip/netns/set_nsid.t
+++ b/testsuite/tests/ip/netns/set_nsid.t
@@ -1,6 +1,6 @@
#!/bin/sh
-source lib/generic.sh
+. lib/generic.sh
ts_log "[Testing netns nsid]"
diff --git a/testsuite/tests/ip/netns/set_nsid_batch.t b/testsuite/tests/ip/netns/set_nsid_batch.t
index abb3f1bb..196fd4b3 100755
--- a/testsuite/tests/ip/netns/set_nsid_batch.t
+++ b/testsuite/tests/ip/netns/set_nsid_batch.t
@@ -1,6 +1,6 @@
#!/bin/sh
-source lib/generic.sh
+. lib/generic.sh
ts_log "[Testing netns nsid in batch mode]"
diff --git a/testsuite/tests/ip/route/add_default_route.t b/testsuite/tests/ip/route/add_default_route.t
index e5ea6473..569ba1f8 100755
--- a/testsuite/tests/ip/route/add_default_route.t
+++ b/testsuite/tests/ip/route/add_default_route.t
@@ -1,6 +1,6 @@
#!/bin/sh
-source lib/generic.sh
+. lib/generic.sh
ts_log "[Testing add default route]"
diff --git a/testsuite/tests/ip/tunnel/add_tunnel.t b/testsuite/tests/ip/tunnel/add_tunnel.t
index 18f6e370..3f5a9d3c 100755
--- a/testsuite/tests/ip/tunnel/add_tunnel.t
+++ b/testsuite/tests/ip/tunnel/add_tunnel.t
@@ -1,6 +1,6 @@
#!/bin/sh
-source lib/generic.sh
+. lib/generic.sh
TUNNEL_NAME="tunnel_test_ip"
diff --git a/testsuite/tests/tc/cls-testbed.t b/testsuite/tests/tc/cls-testbed.t
index 2afc26fc..d5c21e5c 100755
--- a/testsuite/tests/tc/cls-testbed.t
+++ b/testsuite/tests/tc/cls-testbed.t
@@ -1,7 +1,7 @@
#!/bin/bash
# vim: ft=sh
-source lib/generic.sh
+. lib/generic.sh
QDISCS="cbq htb dsmark"
diff --git a/testsuite/tests/tc/dsmark.t b/testsuite/tests/tc/dsmark.t
index 6934165e..177585e6 100755
--- a/testsuite/tests/tc/dsmark.t
+++ b/testsuite/tests/tc/dsmark.t
@@ -1,7 +1,7 @@
#!/bin/bash
# vim: ft=sh
-source lib/generic.sh
+. lib/generic.sh
ts_qdisc_available "dsmark"
if [ $? -eq 0 ]; then
diff --git a/testsuite/tests/tc/pedit.t b/testsuite/tests/tc/pedit.t
index e9b6c333..8d531a05 100755
--- a/testsuite/tests/tc/pedit.t
+++ b/testsuite/tests/tc/pedit.t
@@ -1,6 +1,6 @@
#!/bin/sh
-source lib/generic.sh
+. lib/generic.sh
DEV="$(rand_dev)"
ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV type dummy
diff --git a/tipc/Makefile b/tipc/Makefile
index 2212beb0..d3c957e2 100644
--- a/tipc/Makefile
+++ b/tipc/Makefile
@@ -10,8 +10,6 @@ TIPCOBJ=bearer.o \
peer.o tipc.o
TARGETS=tipc
-CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
-LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
endif