diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 16 | ||||
-rw-r--r-- | net/ipv6/addrlabel.c | 10 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 6 | ||||
-rw-r--r-- | net/ipv6/exthdrs_core.c | 4 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 3 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 9 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 22 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 153 | ||||
-rw-r--r-- | net/ipv6/ip6mr.c | 1 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 18 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 14 | ||||
-rw-r--r-- | net/ipv6/raw.c | 12 | ||||
-rw-r--r-- | net/ipv6/route.c | 80 | ||||
-rw-r--r-- | net/ipv6/sit.c | 163 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 10 | ||||
-rw-r--r-- | net/ipv6/xfrm6_state.c | 33 | ||||
-rw-r--r-- | net/ipv6/xfrm6_tunnel.c | 4 |
18 files changed, 372 insertions, 188 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5bc893e28008..ec7a91d9e865 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -243,7 +243,7 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev) /* Check if a route is valid prefix route */ static inline int addrconf_is_prefix_route(const struct rt6_info *rt) { - return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); + return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0; } static void addrconf_del_timer(struct inet6_ifaddr *ifp) @@ -1544,7 +1544,7 @@ static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) return 0; } -int __ipv6_isatap_ifid(u8 *eui, __be32 addr) +static int __ipv6_isatap_ifid(u8 *eui, __be32 addr) { if (addr == 0) return -1; @@ -1560,7 +1560,6 @@ int __ipv6_isatap_ifid(u8 *eui, __be32 addr) memcpy(eui + 4, &addr, 4); return 0; } -EXPORT_SYMBOL(__ipv6_isatap_ifid); static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) { @@ -4638,10 +4637,12 @@ int __init addrconf_init(void) if (err < 0) { printk(KERN_CRIT "IPv6 Addrconf:" " cannot initialize default policy table: %d.\n", err); - return err; + goto out; } - register_pernet_subsys(&addrconf_ops); + err = register_pernet_subsys(&addrconf_ops); + if (err < 0) + goto out_addrlabel; /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup @@ -4693,7 +4694,9 @@ errout: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); - +out_addrlabel: + ipv6_addr_label_cleanup(); +out: return err; } @@ -4704,6 +4707,7 @@ void addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); + ipv6_addr_label_cleanup(); rtnl_lock(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f0e774cea386..c8993e5a337c 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void) return register_pernet_subsys(&ipv6_addr_label_ops); } +void ipv6_addr_label_cleanup(void) +{ + unregister_pernet_subsys(&ipv6_addr_label_ops); +} + static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, [IFAL_LABEL] = { .len = sizeof(u32), }, @@ -513,10 +518,9 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) static inline int ip6addrlbl_msgsize(void) { - return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) + return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) + nla_total_size(16) /* IFAL_ADDRESS */ - + nla_total_size(4) /* IFAL_LABEL */ - ); + + nla_total_size(4); /* IFAL_LABEL */ } static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4869797c1afa..54e8e42f7a88 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -468,7 +468,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = sk->sk_bound_dev_if; *uaddr_len = sizeof(*sin); - return(0); + return 0; } EXPORT_SYMBOL(inet6_getname); @@ -489,7 +489,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: - return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); + return ipv6_route_ioctl(net, cmd, (void __user *)arg); case SIOCSIFADDR: return addrconf_add_ifaddr(net, (void __user *) arg); @@ -503,7 +503,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return sk->sk_prot->ioctl(sk, cmd, arg); } /*NOTREACHED*/ - return(0); + return 0; } EXPORT_SYMBOL(inet6_ioctl); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index e1caa5d526c2..14ed0a955b56 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -13,12 +13,12 @@ int ipv6_ext_hdr(u8 nexthdr) /* * find out if nexthdr is an extension header or a protocol */ - return ( (nexthdr == NEXTHDR_HOP) || + return (nexthdr == NEXTHDR_HOP) || (nexthdr == NEXTHDR_ROUTING) || (nexthdr == NEXTHDR_FRAGMENT) || (nexthdr == NEXTHDR_AUTH) || (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); + (nexthdr == NEXTHDR_DEST); } /* diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b1108ede18e1..d829874d8946 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -34,11 +34,10 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, { struct fib_lookup_arg arg = { .lookup_ptr = lookup, + .flags = FIB_LOOKUP_NOREF, }; fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); - if (arg.rule) - fib_rule_put(arg.rule); if (arg.result) return arg.result; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b6a585909d35..de382114609b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1500,15 +1500,18 @@ static void fib6_gc_timer_cb(unsigned long arg) static int __net_init fib6_net_init(struct net *net) { + size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; + setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); if (!net->ipv6.rt6_stats) goto out_timer; - net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ, - sizeof(*net->ipv6.fib_table_hash), - GFP_KERNEL); + /* Avoid false sharing : Use at least a full cache line */ + size = max_t(size_t, size, L1_CACHE_BYTES); + + net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL); if (!net->ipv6.fib_table_hash) goto out_rt6_stats; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1838927a2243..99157b4cd56e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -639,7 +639,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); - int truesizes = 0; + struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -651,18 +651,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + skb->truesize -= frag->truesize; } err = 0; @@ -693,7 +693,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); @@ -756,6 +755,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IPSTATS_MIB_FRAGFAILS); dst_release(&rt->dst); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: @@ -870,8 +878,8 @@ static inline int ip6_rt_check(struct rt6key *rt_key, struct in6_addr *fl_addr, struct in6_addr *addr_cache) { - return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && - (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); + return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && + (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 29f99dd75bc6..c2c0f89397b1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -75,7 +75,7 @@ MODULE_LICENSE("GPL"); (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ (HASH_SIZE - 1)) -static void ip6_tnl_dev_init(struct net_device *dev); +static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static int ip6_tnl_net_id __read_mostly; @@ -83,15 +83,42 @@ struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl *tnls_r_l[HASH_SIZE]; - struct ip6_tnl *tnls_wc[1]; - struct ip6_tnl **tnls[2]; + struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_wc[1]; + struct ip6_tnl __rcu **tnls[2]; }; +/* often modified stats are per cpu, other are shared (netdev->stats) */ +struct pcpu_tstats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +}; + +static struct net_device_stats *ip6_get_stats(struct net_device *dev) +{ + struct pcpu_tstats sum = { 0 }; + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + + sum.rx_packets += tstats->rx_packets; + sum.rx_bytes += tstats->rx_bytes; + sum.tx_packets += tstats->tx_packets; + sum.tx_bytes += tstats->tx_bytes; + } + dev->stats.rx_packets = sum.rx_packets; + dev->stats.rx_bytes = sum.rx_bytes; + dev->stats.tx_packets = sum.tx_packets; + dev->stats.tx_bytes = sum.tx_bytes; + return &dev->stats; +} + /* - * Locking : hash tables are protected by RCU and a spinlock + * Locking : hash tables are protected by RCU and RTNL */ -static DEFINE_SPINLOCK(ip6_tnl_lock); static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) { @@ -138,8 +165,8 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) static struct ip6_tnl * ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) { - unsigned h0 = HASH(remote); - unsigned h1 = HASH(local); + unsigned int h0 = HASH(remote); + unsigned int h1 = HASH(local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -167,7 +194,7 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) * Return: head of IPv6 tunnel list **/ -static struct ip6_tnl ** +static struct ip6_tnl __rcu ** ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) { struct in6_addr *remote = &p->raddr; @@ -190,12 +217,10 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) static void ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { - struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); + struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); - spin_lock_bh(&ip6_tnl_lock); - t->next = *tp; + rcu_assign_pointer(t->next , rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); - spin_unlock_bh(&ip6_tnl_lock); } /** @@ -206,18 +231,25 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) static void ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { - struct ip6_tnl **tp; - - for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { - if (t == *tp) { - spin_lock_bh(&ip6_tnl_lock); - *tp = t->next; - spin_unlock_bh(&ip6_tnl_lock); + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + for (tp = ip6_tnl_bucket(ip6n, &t->parms); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); break; } } } +static void ip6_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + /** * ip6_tnl_create() - create a new tunnel * @p: tunnel parameters @@ -256,7 +288,9 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) t = netdev_priv(dev); t->parms = *p; - ip6_tnl_dev_init(dev); + err = ip6_tnl_dev_init(dev); + if (err < 0) + goto failed_free; if ((err = register_netdevice(dev)) < 0) goto failed_free; @@ -266,7 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) return t; failed_free: - free_netdev(dev); + ip6_dev_free(dev); failed: return NULL; } @@ -290,10 +324,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net, { struct in6_addr *remote = &p->raddr; struct in6_addr *local = &p->laddr; + struct ip6_tnl __rcu **tp; struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { + for (tp = ip6_tnl_bucket(ip6n, p); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr)) return t; @@ -318,13 +355,10 @@ ip6_tnl_dev_uninit(struct net_device *dev) struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - if (dev == ip6n->fb_tnl_dev) { - spin_lock_bh(&ip6_tnl_lock); - ip6n->tnls_wc[0] = NULL; - spin_unlock_bh(&ip6_tnl_lock); - } else { + if (dev == ip6n->fb_tnl_dev) + rcu_assign_pointer(ip6n->tnls_wc[0], NULL); + else ip6_tnl_unlink(ip6n, t); - } ip6_tnl_dst_reset(t); dev_put(dev); } @@ -702,6 +736,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { + struct pcpu_tstats *tstats; + if (t->parms.proto != ipproto && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -724,10 +760,16 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, skb->pkt_type = PACKET_HOST; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); - skb_tunnel_rx(skb, t->dev); + tstats = this_cpu_ptr(t->dev->tstats); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + + __skb_tunnel_rx(skb, t->dev); dscp_ecn_decapsulate(t, ipv6h, skb); + netif_rx(skb); + rcu_read_unlock(); return 0; } @@ -934,8 +976,10 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, err = ip6_local_out(skb); if (net_xmit_eval(err) == 0) { - stats->tx_bytes += pkt_len; - stats->tx_packets++; + struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats); + + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; } else { stats->tx_errors++; stats->tx_aborted_errors++; @@ -1300,12 +1344,14 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ip6_tnl_netdev_ops = { - .ndo_uninit = ip6_tnl_dev_uninit, + .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_xmit, - .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats = ip6_get_stats, }; + /** * ip6_tnl_dev_setup - setup virtual tunnel device * @dev: virtual device associated with tunnel @@ -1317,7 +1363,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; - dev->destructor = free_netdev; + dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); @@ -1333,12 +1379,17 @@ static void ip6_tnl_dev_setup(struct net_device *dev) * @dev: virtual device associated with tunnel **/ -static inline void +static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + t->dev = dev; strcpy(t->parms.name, dev->name); + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + return 0; } /** @@ -1346,11 +1397,15 @@ ip6_tnl_dev_init_gen(struct net_device *dev) * @dev: virtual device associated with tunnel **/ -static void ip6_tnl_dev_init(struct net_device *dev) +static int ip6_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - ip6_tnl_dev_init_gen(dev); + int err = ip6_tnl_dev_init_gen(dev); + + if (err) + return err; ip6_tnl_link_config(t); + return 0; } /** @@ -1360,16 +1415,20 @@ static void ip6_tnl_dev_init(struct net_device *dev) * Return: 0 **/ -static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev) +static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + int err = ip6_tnl_dev_init_gen(dev); + + if (err) + return err; - ip6_tnl_dev_init_gen(dev); t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - ip6n->tnls_wc[0] = t; + rcu_assign_pointer(ip6n->tnls_wc[0], t); + return 0; } static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { @@ -1391,14 +1450,14 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) LIST_HEAD(list); for (h = 0; h < HASH_SIZE; h++) { - t = ip6n->tnls_r_l[h]; + t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t != NULL) { unregister_netdevice_queue(t->dev, &list); - t = t->next; + t = rtnl_dereference(t->next); } } - t = ip6n->tnls_wc[0]; + t = rtnl_dereference(ip6n->tnls_wc[0]); unregister_netdevice_queue(t->dev, &list); unregister_netdevice_many(&list); } @@ -1419,7 +1478,9 @@ static int __net_init ip6_tnl_init_net(struct net *net) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); - ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + if (err < 0) + goto err_register; err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) @@ -1427,7 +1488,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) return 0; err_register: - free_netdev(ip6n->fb_tnl_dev); + ip6_dev_free(ip6n->fb_tnl_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 66078dad7fe8..6f32ffce7022 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -667,6 +667,7 @@ static int pim6_rcv(struct sk_buff *skb) skb_tunnel_rx(skb, reg_dev); netif_rx(skb); + dev_put(reg_dev); return 0; drop: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 69a0051cea67..998d6d27e7cf 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -91,7 +91,9 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> -static u32 ndisc_hash(const void *pkey, const struct net_device *dev); +static u32 ndisc_hash(const void *pkey, + const struct net_device *dev, + __u32 rnd); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -228,12 +230,12 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while(cur < end && cur->nd_opt_type != type); - return (cur <= end && cur->nd_opt_type == type ? cur : NULL); + return cur <= end && cur->nd_opt_type == type ? cur : NULL; } static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) { - return (opt->nd_opt_type == ND_OPT_RDNSS); + return opt->nd_opt_type == ND_OPT_RDNSS; } static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, @@ -244,7 +246,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while(cur < end && !ndisc_is_useropt(cur)); - return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); + return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; } static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, @@ -319,7 +321,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, int prepad = ndisc_addr_option_pad(dev->type); if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) return NULL; - return (lladdr + prepad); + return lladdr + prepad; } int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) @@ -350,7 +352,9 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d EXPORT_SYMBOL(ndisc_mc_map); -static u32 ndisc_hash(const void *pkey, const struct net_device *dev) +static u32 ndisc_hash(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd) { const u32 *p32 = pkey; u32 addr_hash, i; @@ -359,7 +363,7 @@ static u32 ndisc_hash(const void *pkey, const struct net_device *dev) for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++) addr_hash ^= *p32++; - return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd); + return jhash_2words(addr_hash, dev->ifindex, hash_rnd); } static int ndisc_constructor(struct neighbour *neigh) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c683e9e7023b..51df035897e7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -82,13 +82,13 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); int ip6t_ext_hdr(u8 nexthdr) { - return ( (nexthdr == IPPROTO_HOPOPTS) || - (nexthdr == IPPROTO_ROUTING) || - (nexthdr == IPPROTO_FRAGMENT) || - (nexthdr == IPPROTO_ESP) || - (nexthdr == IPPROTO_AH) || - (nexthdr == IPPROTO_NONE) || - (nexthdr == IPPROTO_DSTOPTS) ); + return (nexthdr == IPPROTO_HOPOPTS) || + (nexthdr == IPPROTO_ROUTING) || + (nexthdr == IPPROTO_FRAGMENT) || + (nexthdr == IPPROTO_ESP) || + (nexthdr == IPPROTO_AH) || + (nexthdr == IPPROTO_NONE) || + (nexthdr == IPPROTO_DSTOPTS); } /* Returns whether matches rule or not. */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e677937a07fc..45e6efb7f171 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -764,7 +764,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, return -EINVAL; if (sin6->sin6_family && sin6->sin6_family != AF_INET6) - return(-EAFNOSUPPORT); + return -EAFNOSUPPORT; /* port is the proto value [0..255] carried in nexthdr */ proto = ntohs(sin6->sin6_port); @@ -772,10 +772,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!proto) proto = inet->inet_num; else if (proto != inet->inet_num) - return(-EINVAL); + return -EINVAL; if (proto > 255) - return(-EINVAL); + return -EINVAL; daddr = &sin6->sin6_addr; if (np->sndflow) { @@ -985,7 +985,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, /* You may get strange result with a positive odd offset; RFC2292bis agrees with me. */ if (val > 0 && (val&1)) - return(-EINVAL); + return -EINVAL; if (val < 0) { rp->checksum = 0; } else { @@ -997,7 +997,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, break; default: - return(-ENOPROTOOPT); + return -ENOPROTOOPT; } } @@ -1190,7 +1190,7 @@ static int rawv6_init_sk(struct sock *sk) default: break; } - return(0); + return 0; } struct proto rawv6_prot = { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d126365ac046..25661f968f3f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, - .entries = ATOMIC_INIT(0), }; static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entries = ATOMIC_INIT(0), }; static struct rt6_info ip6_null_entry_template = { @@ -217,14 +215,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static __inline__ int rt6_check_expired(const struct rt6_info *rt) { - return (rt->rt6i_flags & RTF_EXPIRES && - time_after(jiffies, rt->rt6i_expires)); + return (rt->rt6i_flags & RTF_EXPIRES) && + time_after(jiffies, rt->rt6i_expires); } static inline int rt6_need_strict(struct in6_addr *daddr) { - return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); + return ipv6_addr_type(daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } /* @@ -440,7 +438,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) __func__, match); net = dev_net(rt0->rt6i_dev); - return (match ? match : net->ipv6.ip6_null_entry); + return match ? match : net->ipv6.ip6_null_entry; } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -670,7 +668,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad if (net_ratelimit()) printk(KERN_WARNING - "Neighbour table overflow.\n"); + "ipv6: Neighbour table overflow.\n"); dst_free(&rt->dst); return NULL; } @@ -859,7 +857,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl dst_release(*dstp); *dstp = new; - return (new ? 0 : -ENOMEM); + return new ? 0 : -ENOMEM; } EXPORT_SYMBOL_GPL(ip6_dst_blackhole); @@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops) int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + int entries; + entries = dst_entries_get_fast(ops); if (time_after(rt_last_gc + rt_min_interval, now) && - atomic_read(&ops->entries) <= rt_max_size) + entries <= rt_max_size) goto out; net->ipv6.ip6_rt_gc_expire++; fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); net->ipv6.ip6_rt_last_gc = now; - if (atomic_read(&ops->entries) < ops->gc_thresh) + entries = dst_entries_get_slow(ops); + if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; - return (atomic_read(&ops->entries) > rt_max_size); + return entries > rt_max_size; } /* Clean host part of a prefix. Not necessary in radix tree, @@ -1169,6 +1170,8 @@ int ip6_route_add(struct fib6_config *cfg) if (addr_type & IPV6_ADDR_MULTICAST) rt->dst.input = ip6_mc_input; + else if (cfg->fc_flags & RTF_LOCAL) + rt->dst.input = ip6_input; else rt->dst.input = ip6_forward; @@ -1190,7 +1193,8 @@ int ip6_route_add(struct fib6_config *cfg) they would result in kernel looping; promote them to reject routes */ if ((cfg->fc_flags & RTF_REJECT) || - (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { + (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) + && !(cfg->fc_flags&RTF_LOCAL))) { /* hold loopback dev/idev if we haven't done so. */ if (dev != net->loopback_dev) { if (dev) { @@ -1556,14 +1560,13 @@ out: * i.e. Path MTU discovery */ -void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) +static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, + struct net *net, u32 pmtu, int ifindex) { struct rt6_info *rt, *nrt; - struct net *net = dev_net(dev); int allfrag = 0; - rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); + rt = rt6_lookup(net, daddr, saddr, ifindex, 0); if (rt == NULL) return; @@ -1631,6 +1634,27 @@ out: dst_release(&rt->dst); } +void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, + struct net_device *dev, u32 pmtu) +{ + struct net *net = dev_net(dev); + + /* + * RFC 1981 states that a node "MUST reduce the size of the packets it + * is sending along the path" that caused the Packet Too Big message. + * Since it's not possible in the general case to determine which + * interface was used to send the original packet, we update the MTU + * on the interface that will be used to send future packets. We also + * update the MTU on the interface that received the Packet Too Big in + * case the original packet was forced out that interface with + * SO_BINDTODEVICE or similar. This is the next best thing to the + * correct behaviour, which would be to update the MTU on all + * interfaces. + */ + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); + rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); +} + /* * Misc support functions */ @@ -2082,6 +2106,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_UNREACHABLE) cfg->fc_flags |= RTF_REJECT; + if (rtm->rtm_type == RTN_LOCAL) + cfg->fc_flags |= RTF_LOCAL; + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2202,6 +2229,8 @@ static int rt6_fill_node(struct net *net, NLA_PUT_U32(skb, RTA_TABLE, table); if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; + else if (rt->rt6i_flags&RTF_LOCAL) + rtm->rtm_type = RTN_LOCAL; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) rtm->rtm_type = RTN_LOCAL; else @@ -2496,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) net->ipv6.rt6_stats->fib_rt_alloc, net->ipv6.rt6_stats->fib_rt_entries, net->ipv6.rt6_stats->fib_rt_cache, - atomic_read(&net->ipv6.ip6_dst_ops.entries), + dst_entries_get_slow(&net->ipv6.ip6_dst_ops), net->ipv6.rt6_stats->fib_discarded_routes); return 0; @@ -2638,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net) memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, sizeof(net->ipv6.ip6_dst_ops)); + if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) + goto out_ip6_dst_ops; + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, sizeof(*net->ipv6.ip6_null_entry), GFP_KERNEL); if (!net->ipv6.ip6_null_entry) - goto out_ip6_dst_ops; + goto out_ip6_dst_entries; net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; @@ -2692,6 +2724,8 @@ out_ip6_prohibit_entry: out_ip6_null_entry: kfree(net->ipv6.ip6_null_entry); #endif +out_ip6_dst_entries: + dst_entries_destroy(&net->ipv6.ip6_dst_ops); out_ip6_dst_ops: goto out; } @@ -2730,10 +2764,14 @@ int __init ip6_route_init(void) if (!ip6_dst_ops_template.kmem_cachep) goto out; - ret = register_pernet_subsys(&ip6_route_net_ops); + ret = dst_entries_init(&ip6_dst_blackhole_ops); if (ret) goto out_kmem_cache; + ret = register_pernet_subsys(&ip6_route_net_ops); + if (ret) + goto out_dst_entries; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -2780,6 +2818,8 @@ out_fib6_init: fib6_gc_cleanup(); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); +out_dst_entries: + dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); goto out; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 86618eb30335..367a6cc584cc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -63,36 +63,63 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) -static void ipip6_tunnel_init(struct net_device *dev); +static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); +static void ipip6_dev_free(struct net_device *dev); static int sit_net_id __read_mostly; struct sit_net { - struct ip_tunnel *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel *tunnels_r[HASH_SIZE]; - struct ip_tunnel *tunnels_l[HASH_SIZE]; - struct ip_tunnel *tunnels_wc[1]; - struct ip_tunnel **tunnels[4]; + struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_wc[1]; + struct ip_tunnel __rcu **tunnels[4]; struct net_device *fb_tunnel_dev; }; /* - * Locking : hash tables are protected by RCU and a spinlock + * Locking : hash tables are protected by RCU and RTNL */ -static DEFINE_SPINLOCK(ipip6_lock); #define for_each_ip_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) +/* often modified stats are per cpu, other are shared (netdev->stats) */ +struct pcpu_tstats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +}; + +static struct net_device_stats *ipip6_get_stats(struct net_device *dev) +{ + struct pcpu_tstats sum = { 0 }; + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + + sum.rx_packets += tstats->rx_packets; + sum.rx_bytes += tstats->rx_bytes; + sum.tx_packets += tstats->tx_packets; + sum.tx_bytes += tstats->tx_bytes; + } + dev->stats.rx_packets = sum.rx_packets; + dev->stats.rx_bytes = sum.rx_bytes; + dev->stats.tx_packets = sum.tx_packets; + dev->stats.tx_bytes = sum.tx_bytes; + return &dev->stats; +} /* * Must be invoked with rcu_read_lock */ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, struct net_device *dev, __be32 remote, __be32 local) { - unsigned h0 = HASH(remote); - unsigned h1 = HASH(local); + unsigned int h0 = HASH(remote); + unsigned int h1 = HASH(local); struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); @@ -121,12 +148,12 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, return NULL; } -static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, +static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn, struct ip_tunnel_parm *parms) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; - unsigned h = 0; + unsigned int h = 0; int prio = 0; if (remote) { @@ -140,7 +167,7 @@ static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, return &sitn->tunnels[prio][h]; } -static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, +static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn, struct ip_tunnel *t) { return __ipip6_bucket(sitn, &t->parms); @@ -148,13 +175,14 @@ static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) { - struct ip_tunnel **tp; - - for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { - if (t == *tp) { - spin_lock_bh(&ipip6_lock); - *tp = t->next; - spin_unlock_bh(&ipip6_lock); + struct ip_tunnel __rcu **tp; + struct ip_tunnel *iter; + + for (tp = ipip6_bucket(sitn, t); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); break; } } @@ -162,12 +190,10 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { - struct ip_tunnel **tp = ipip6_bucket(sitn, t); + struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t); - spin_lock_bh(&ipip6_lock); - t->next = *tp; + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); - spin_unlock_bh(&ipip6_lock); } static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) @@ -187,17 +213,20 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #endif } -static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, +static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, struct ip_tunnel_parm *parms, int create) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; - struct ip_tunnel *t, **tp, *nt; + struct ip_tunnel *t, *nt; + struct ip_tunnel __rcu **tp; struct net_device *dev; char name[IFNAMSIZ]; struct sit_net *sitn = net_generic(net, sit_net_id); - for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { + for (tp = __ipip6_bucket(sitn, parms); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && parms->link == t->parms.link) { @@ -213,7 +242,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, if (parms->name[0]) strlcpy(name, parms->name, IFNAMSIZ); else - sprintf(name, "sit%%d"); + strcpy(name, "sit%d"); dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); if (dev == NULL) @@ -229,7 +258,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, nt = netdev_priv(dev); nt->parms = *parms; - ipip6_tunnel_init(dev); + if (ipip6_tunnel_init(dev) < 0) + goto failed_free; ipip6_tunnel_clone_6rd(dev, sitn); if (parms->i_flags & SIT_ISATAP) @@ -244,7 +274,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, return nt; failed_free: - free_netdev(dev); + ipip6_dev_free(dev); failed: return NULL; } @@ -340,7 +370,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) ASSERT_RTNL(); - for (p = t->prl; p; p = p->next) { + for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) { if (p->addr == a->addr) { if (chg) { p->flags = a->flags; @@ -451,15 +481,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { - spin_lock_bh(&ipip6_lock); - sitn->tunnels_wc[0] = NULL; - spin_unlock_bh(&ipip6_lock); - dev_put(dev); + rcu_assign_pointer(sitn->tunnels_wc[0], NULL); } else { ipip6_tunnel_unlink(sitn, netdev_priv(dev)); ipip6_tunnel_del_prl(netdev_priv(dev), NULL); - dev_put(dev); } + dev_put(dev); } @@ -548,6 +575,8 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { + struct pcpu_tstats *tstats; + secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); @@ -563,10 +592,16 @@ static int ipip6_rcv(struct sk_buff *skb) return 0; } - skb_tunnel_rx(skb, tunnel->dev); + tstats = this_cpu_ptr(tunnel->dev->tstats); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + + __skb_tunnel_rx(skb, tunnel->dev); ipip6_ecn_decapsulate(iph, skb); + netif_rx(skb); + rcu_read_unlock(); return 0; } @@ -590,7 +625,7 @@ __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel) #ifdef CONFIG_IPV6_SIT_6RD if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, tunnel->ip6rd.prefixlen)) { - unsigned pbw0, pbi0; + unsigned int pbw0, pbi0; int pbi1; u32 d; @@ -625,14 +660,13 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + struct pcpu_tstats *tstats; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ + struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; @@ -703,20 +737,20 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - stats->tx_carrier_errors++; + dev->stats.tx_carrier_errors++; goto tx_error_icmp; } } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); - stats->tx_carrier_errors++; + dev->stats.tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); - stats->collisions++; + dev->stats.collisions++; goto tx_error; } @@ -724,7 +758,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { - stats->collisions++; + dev->stats.collisions++; ip_rt_put(rt); goto tx_error; } @@ -763,7 +797,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - txq->tx_dropped++; + dev->stats.tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -799,14 +833,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, iph->ttl = iph6->hop_limit; nf_reset(skb); - - IPTUNNEL_XMIT(); + tstats = this_cpu_ptr(dev->tstats); + __IPTUNNEL_XMIT(tstats, &dev->stats); return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); tx_error: - stats->tx_errors++; + dev->stats.tx_errors++; dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -1083,12 +1117,19 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_start_xmit = ipip6_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, + .ndo_get_stats = ipip6_get_stats, }; +static void ipip6_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + static void ipip6_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip6_netdev_ops; - dev->destructor = free_netdev; + dev->destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); @@ -1098,9 +1139,10 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; + dev->features |= NETIF_F_LLTX; } -static void ipip6_tunnel_init(struct net_device *dev) +static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); @@ -1111,9 +1153,14 @@ static void ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; } -static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) +static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; @@ -1128,8 +1175,12 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; dev_hold(dev); sitn->tunnels_wc[0] = tunnel; + return 0; } static struct xfrm_tunnel sit_handler __read_mostly = { @@ -1173,7 +1224,10 @@ static int __net_init sit_init_net(struct net *net) } dev_net_set(sitn->fb_tunnel_dev, net); - ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + if (err) + goto err_dev_free; + ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); if ((err = register_netdev(sitn->fb_tunnel_dev))) @@ -1183,7 +1237,8 @@ static int __net_init sit_init_net(struct net *net) err_reg_dev: dev_put(sitn->fb_tunnel_dev); - free_netdev(sitn->fb_tunnel_dev); +err_dev_free: + ipip6_dev_free(sitn->fb_tunnel_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ba5258ef1c57..7e41e2cbb85e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -139,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; if (usin->sin6_family != AF_INET6) - return(-EAFNOSUPPORT); + return -EAFNOSUPPORT; memset(&fl, 0, sizeof(fl)); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6baeabbbca82..7e74023ea6e4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); xfrm6_policy_afinfo.garbage_collect(net); - return (atomic_read(&ops->entries) > ops->gc_thresh * 2); + return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = { .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 1024, - .entries = ATOMIC_INIT(0), }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { @@ -312,11 +311,13 @@ int __init xfrm6_init(void) */ gc_thresh = FIB6_TABLE_HASHSZ * 8; xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; + dst_entries_init(&xfrm6_dst_ops); ret = xfrm6_policy_init(); - if (ret) + if (ret) { + dst_entries_destroy(&xfrm6_dst_ops); goto out; - + } ret = xfrm6_state_init(); if (ret) goto out_policy; @@ -341,4 +342,5 @@ void xfrm6_fini(void) //xfrm6_input_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); + dst_entries_destroy(&xfrm6_dst_ops); } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index f417b77fa0e1..a67575d472a3 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,23 +20,27 @@ #include <net/addrconf.h> static void -__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) { /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET6; - x->sel.prefixlen_d = 128; - x->sel.prefixlen_s = 128; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET6; + sel->prefixlen_d = 128; + sel->prefixlen_s = 128; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) +{ x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); @@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, + .init_temprop = xfrm6_init_temprop, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index ac7584b946a5..2969cad408de 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -199,7 +199,7 @@ static void x6spi_destroy_rcu(struct rcu_head *head) container_of(head, struct xfrm6_tunnel_spi, rcu_head)); } -void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) +static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; @@ -223,8 +223,6 @@ void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) spin_unlock_bh(&xfrm6_tunnel_spi_lock); } -EXPORT_SYMBOL(xfrm6_tunnel_free_spi); - static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { skb_push(skb, -skb_network_offset(skb)); |