aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-03-04 13:04:24 -0500
committerDavid S. Miller <davem@davemloft.net>2018-03-04 13:04:24 -0500
commitb33cc2cec9e2e58ec5d220e26f1111117ea782d9 (patch)
tree7c717d159d438db0ac887315bbe55d2fb1d75101
parente871cae7927ba5d86508066a5f4bec0ea1d7eb95 (diff)
parent91a5c1ecba9f23f247b3772e6d54aabfebc0e300 (diff)
downloadkernel_replicant_linux-b33cc2cec9e2e58ec5d220e26f1111117ea782d9.tar.gz
kernel_replicant_linux-b33cc2cec9e2e58ec5d220e26f1111117ea782d9.tar.bz2
kernel_replicant_linux-b33cc2cec9e2e58ec5d220e26f1111117ea782d9.zip
Merge branch 'net-ipv6-Add-support-for-path-selection-using-hash-of-5-tuple'
David Ahern says: ==================== net/ipv6: Add support for path selection using hash of 5-tuple Hardware supports multipath selection using the standard L4 5-tuple instead of just L3 and the flow label. In addition, some network operators prefer IPv6 path selection to use the 5-tuple. To that end, add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice"). The default is still L3 which covers source and destination addresses along with flow label and IPv6 protocol. This gives users a choice in hash algorithms if they believe L3 only and the IPv6 flow label are not sufficient for their use case. A separate sysctl is added for IPv6, allowing IPv4 and IPv6 to use different algorithms if desired. The first 3 patches modify the IPv4 variant so that at the end of the patch set the ipv4 and ipv6 implementations are direct parallels. Patch 4 refactors the existing rt6_multipath_hash in preparation for adding the policy option. Patch 5 renames the existing netevent to have IPv4 in the name so ipv4 changes can be distinguished from IPv6 if the netevent handler cares. Patch 6 adds the skb as an argument through the FIB lookup functions to the multipath selection. Needed for the forwarding case. Patch 7 adds the L4 hash support. Patch 8 adds the hook for the netevent to the spectrum driver to update the ASIC. Patch 9 removes no longer used code. Patch 10 adds a testcase for IPv6 multipath with L4 hash. v3 - comments from Ido: - removed fib_info arg in patch 1; left by mistake on rebase to net-next - removed __get_hash_from_flowi4 declaration - line wrap change to spectrum_router.c to maintain 80 chars v2 - rebased to top of tree - added refactor of fib_multipath_hash following recent change - plumb skb through lookup functions to multipath selection - fix sysctl setting; was missing the data set in ipv6_sysctl_net_init - added test case RFC to v1: - rebase to top of net-next - fix addr_type in hash_keys and removed flow label as noticed by Ido - added a comment to cover letter about choice in algorithms based on use case per Or's comments ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt7
-rw-r--r--drivers/infiniband/core/cma.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c17
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c3
-rw-r--r--drivers/net/vrf.c7
-rw-r--r--include/net/fib_rules.h1
-rw-r--r--include/net/flow.h16
-rw-r--r--include/net/ip6_fib.h4
-rw-r--r--include/net/ip6_route.h15
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/netevent.h3
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--net/core/flow_dissector.c16
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/route.c49
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/fib6_rules.c8
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c2
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c3
-rw-r--r--net/ipv6/route.c134
-rw-r--r--net/ipv6/seg6_local.c4
-rw-r--r--net/ipv6/sysctl_net_ipv6.c27
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh44
30 files changed, 260 insertions, 131 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..783675a730e5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
FALSE: disabled
Default: FALSE
+fib_multipath_hash_policy - INTEGER
+ Controls which hash policy to use for multipath routes.
+ Default: 0 (Layer 3)
+ Possible values:
+ 0 - Layer 3 (source and destination addresses plus flow label)
+ 1 - Layer 4 (standard 5-tuple)
+
anycast_src_echo_reply - BOOLEAN
Controls the use of anycast addresses as source addresses for ICMPv6
echo reply
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 3ae32d1ddd27..915bbd867b61 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
IPV6_ADDR_LINKLOCAL;
struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
&src_addr->sin6_addr, net_dev->ifindex,
- strict);
+ NULL, strict);
bool ret;
if (!rt)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 69f16c605b9d..a8a578610a7b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2430,7 +2430,8 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
mlxsw_core_schedule_work(&net_work->work);
mlxsw_sp_port_dev_put(mlxsw_sp_port);
break;
- case NETEVENT_MULTIPATH_HASH_UPDATE:
+ case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+ case NETEVENT_IPV6_MPATH_HASH_UPDATE:
net = ptr;
if (!net_eq(net, &init_net))
@@ -7030,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
{
+ bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy;
+
mlxsw_sp_mp_hash_header_set(recr2_pl,
MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
- mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+ if (only_l3) {
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+ } else {
+ mlxsw_sp_mp_hash_header_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_SPORT);
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_DPORT);
+ }
}
static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 17daebd19e65..1a8132eb2a3e 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
};
skb_dst_drop(skb);
- dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+ dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+ skb, flags);
skb_dst_set(skb, dst);
break;
}
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index e459e601c57f..c6be49d3a9eb 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
const struct net_device *dev,
struct flowi6 *fl6,
int ifindex,
+ const struct sk_buff *skb,
int flags)
{
struct net_vrf *vrf = netdev_priv(dev);
@@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
if (!table)
return NULL;
- return ip6_pol_route(net, table, ifindex, fl6, flags);
+ return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
}
static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
@@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
struct net *net = dev_net(vrf_dev);
struct rt6_info *rt6;
- rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+ rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
if (unlikely(!rt6))
return;
@@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
if (!ipv6_addr_any(&fl6->saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+ rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
if (rt)
dst = &rt->dst;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 1c9e17c11953..e5cfcfc7dd93 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -47,6 +47,7 @@ struct fib_rule {
struct fib_lookup_arg {
void *lookup_ptr;
+ const void *lookup_data;
void *result;
struct fib_rule *rule;
u32 table;
diff --git a/include/net/flow.h b/include/net/flow.h
index 64e7ee9cb980..8ce21793094e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family)
__u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
-static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
-{
- struct flow_keys keys;
-
- return __get_hash_from_flowi6(fl6, &keys);
-}
-
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
-
-static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
-{
- struct flow_keys keys;
-
- return __get_hash_from_flowi4(fl4, &keys);
-}
-
#endif
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 8d906a35b534..5e86fd9dc857 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -350,7 +350,8 @@ struct fib6_table {
typedef struct rt6_info *(*pol_lookup_t)(struct net *,
struct fib6_table *,
- struct flowi6 *, int);
+ struct flowi6 *,
+ const struct sk_buff *, int);
struct fib6_entry_notifier_info {
struct fib_notifier_info info; /* must be first */
@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info {
struct fib6_table *fib6_get_table(struct net *net, u32 id);
struct fib6_table *fib6_new_table(struct net *net, u32 id);
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup);
struct fib6_node *fib6_lookup(struct fib6_node *root,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index da2bde5fda8f..ce2abc0ff102 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags);
+ struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags);
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags);
@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags);
+ const struct sk_buff *skb, int flags);
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int ifindex, struct flowi6 *fl6, int flags);
+ int ifindex, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags);
void ip6_route_init_special_entries(void);
int ip6_route_init(void);
@@ -126,9 +128,10 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
}
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
- struct flow_keys *hkeys);
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int flags);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *hkeys);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 8812582a94d5..7c7522e8585b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -395,7 +395,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys);
#endif
void fib_select_multipath(struct fib_result *res, int hash);
diff --git a/include/net/netevent.h b/include/net/netevent.h
index 40e7bab68490..d9918261701c 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -26,7 +26,8 @@ enum netevent_notif_type {
NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
- NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
+ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+ NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
};
int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index e286fda09fcf..5b51110435fc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
int ip6_rt_gc_elasticity;
int ip6_rt_mtu_expires;
int ip6_rt_min_advmss;
+ int multipath_hash_policy;
int flowlabel_consistency;
int auto_flowlabels;
int icmpv6_time;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 559db9ea8d86..d29f09bc5ff9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
}
EXPORT_SYMBOL(__get_hash_from_flowi6);
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
- memset(keys, 0, sizeof(*keys));
-
- keys->addrs.v4addrs.src = fl4->saddr;
- keys->addrs.v4addrs.dst = fl4->daddr;
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- keys->ports.src = fl4->fl4_sport;
- keys->ports.dst = fl4->fl4_dport;
- keys->keyid.keyid = fl4->fl4_gre_key;
- keys->basic.ip_proto = fl4->flowi4_proto;
-
- return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 181b0d8d589c..e7c602c600ac 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
+ int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3bb686dac273..6a7b3cba3972 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1748,44 +1748,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
+ const struct iphdr *key_iph = outer_iph;
const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
struct icmphdr _icmph;
- hash_keys->addrs.v4addrs.src = outer_iph->saddr;
- hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
if (likely(outer_iph->protocol != IPPROTO_ICMP))
- return;
+ goto out;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- return;
+ goto out;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- return;
+ goto out;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB)
- return;
+ goto out;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- return;
- hash_keys->addrs.v4addrs.src = inner_iph->saddr;
- hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ hash_keys->addrs.v4addrs.src = key_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
/* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys)
{
- struct net *net = fi->fib_net;
struct flow_keys hash_keys;
u32 mhash;
@@ -1809,24 +1810,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
/* short-circuit if we already have L4 hash present */
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
+
memset(&hash_keys, 0, sizeof(hash_keys));
- if (flkeys) {
- hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
- hash_keys.ports.src = flkeys->ports.src;
- hash_keys.ports.dst = flkeys->ports.dst;
- hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
- } else {
+ if (!flkeys) {
skb_flow_dissect_flow_keys(skb, &keys, flag);
- hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ flkeys = &keys;
}
+
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1852,7 +1849,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
+ int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 89683d868b37..011de9a20ec6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
return ret;
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index d7d0abc7fd0e..c61718dba2e6 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -78,7 +78,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 04e5f523e50f..00ef9467f3c0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
+ .lookup_data = skb,
.flags = FIB_LOOKUP_NOREF,
};
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
- rt = lookup(net, table, flp6, flags);
+ rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b0778d323b6e..6f84668be6ea 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+ skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cab95cf3b39f..2f995e9e3050 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4f150a394387..83c7766c8c75 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1053,7 +1053,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 869e2e6750f7..1124f310df5a 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
- NULL, 0, 0);
+ NULL, 0, skb2, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -1444,7 +1444,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c617ea17faa8..a482b854eeea 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (rt)
tdev = rt->dst.dev;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d9bb933dd5c4..d1a0cefac273 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct inet6_dev *idev = NULL;
if (ifindex == 0) {
- struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+ struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 94deb69bbbda..910a27318f58 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
lookup_flags |= RT6_LOOKUP_F_IFACE;
}
- rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
if (rt->dst.error)
goto out;
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index cc5174c7254c..3230b3d7b11b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -181,7 +181,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
*dest = 0;
again:
- rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+ lookup_flags);
if (rt->dst.error)
goto put_rt_err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e2bb40824c85..f0ae58424c45 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+ struct rt6_info *match,
struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
int strict)
{
struct rt6_info *sibling, *next_sibling;
@@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash)
- fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL);
+ fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match;
@@ -914,7 +916,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
@@ -929,8 +933,8 @@ restart:
rt = rt6_device_match(net, rt, &fl6->saddr,
fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6,
- fl6->flowi6_oif, flags);
+ rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+ skb, flags);
}
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
@@ -954,14 +958,15 @@ restart:
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags)
+ const struct sk_buff *skb, int flags)
{
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int strict)
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int strict)
{
struct flowi6 fl6 = {
.flowi6_oif = oif,
@@ -975,7 +980,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
- dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+ dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
@@ -1647,7 +1652,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *rt_cache;
@@ -1669,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
redo_rt6_select:
rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
- rt = rt6_multipath_select(rt, fl6, oif, strict);
+ rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
@@ -1768,20 +1774,25 @@ uncached_rt_out:
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
}
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
@@ -1815,8 +1826,6 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
key_iph = inner_iph;
_flkeys = NULL;
out:
- memset(keys, 0, sizeof(*keys));
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
if (_flkeys) {
keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
@@ -1831,17 +1840,60 @@ out:
}
/* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
- struct flow_keys *flkeys)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
+ u32 mhash;
- if (skb) {
- ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
- return flow_hash_from_keys(&hash_keys) >> 1;
+ switch (net->ipv6.sysctl.multipath_hash_policy) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (skb) {
+ ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+ } else {
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
+ case 1:
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+ hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.ports.src = fl6->fl6_sport;
+ hash_keys.ports.dst = fl6->fl6_dport;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
}
+ mhash = flow_hash_from_keys(&hash_keys);
- return get_hash_from_flowi6(fl6) >> 1;
+ return mhash >> 1;
}
void ip6_route_input(struct sk_buff *skb)
@@ -1868,15 +1920,19 @@ void ip6_route_input(struct sk_buff *skb)
flkeys = &_flkeys;
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
skb_dst_drop(skb);
- skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+ skb_dst_set(skb,
+ ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
}
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1904,7 +1960,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+ return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
EXPORT_SYMBOL_GPL(ip6_route_output_flags);
@@ -2153,6 +2209,7 @@ struct ip6rd_flowi {
static struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2226,8 +2283,9 @@ out:
};
static struct dst_entry *ip6_route_redirect(struct net *net,
- const struct flowi6 *fl6,
- const struct in6_addr *gateway)
+ const struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ const struct in6_addr *gateway)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip6rd_flowi rdfl;
@@ -2235,7 +2293,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
rdfl.fl6 = *fl6;
rdfl.gateway = *gateway;
- return fib6_rule_lookup(net, &rdfl.fl6,
+ return fib6_rule_lookup(net, &rdfl.fl6, skb,
flags, __ip6_route_redirect);
}
@@ -2255,7 +2313,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
fl6.flowlabel = ip6_flowinfo(iph);
fl6.flowi6_uid = uid;
- dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
@@ -2277,7 +2335,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
fl6.saddr = iph->daddr;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
@@ -2479,7 +2537,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
flags |= RT6_LOOKUP_F_HAS_SADDR;
flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
- rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
/* if table lookup failed, fall back to full lookup */
if (rt == net->ipv6.ip6_null_entry) {
@@ -2542,7 +2600,7 @@ static int ip6_route_check_nh(struct net *net,
}
if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
if (!grt)
goto out;
@@ -4607,7 +4665,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
rcu_read_unlock();
} else {
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index ba3767ef5e93..45722327375a 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
if (!tbl_id) {
- dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
} else {
struct fib6_table *table;
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
if (!table)
goto out;
- rt = ip6_pol_route(net, table, 0, &fl6, flags);
+ rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
dst = &rt->dst;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 262f791f1b9b..966c42af92f4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
+#include <net/netevent.h>
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+static int zero;
static int one = 1;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net;
+ int ret;
+
+ net = container_of(table->data, struct net,
+ ipv6.sysctl.multipath_hash_policy);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+ return ret;
+}
static struct ctl_table ipv6_table_template[] = {
{
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "fib_multipath_hash_policy",
+ .data = &init_net.ipv6.sysctl.multipath_hash_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_rt6_multipath_hash_policy,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+ ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 55595305a604..3bc351008db6 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -235,6 +235,45 @@ multipath4_test()
sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
}
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+ local hash_policy
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+}
+
multipath6_test()
{
local desc="$1"
@@ -278,6 +317,11 @@ multipath_test()
multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
}
setup_prepare()