diff options
Diffstat (limited to 'net')
188 files changed, 5438 insertions, 1195 deletions
diff --git a/net/802/garp.c b/net/802/garp.c index 16102951d36a..070bf4403bf8 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -553,7 +553,7 @@ static void garp_release_port(struct net_device *dev) if (rtnl_dereference(port->applicants[i])) return; } - rcu_assign_pointer(dev->garp_port, NULL); + RCU_INIT_POINTER(dev->garp_port, NULL); kfree_rcu(port, rcu); } @@ -605,7 +605,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl ASSERT_RTNL(); - rcu_assign_pointer(port->applicants[appl->type], NULL); + RCU_INIT_POINTER(port->applicants[appl->type], NULL); /* Delete timer and generate a final TRANSMIT_PDU event to flush out * all pending messages before the applicant is gone. */ diff --git a/net/802/stp.c b/net/802/stp.c index 978c30b1b36b..0e136ef1e4ba 100644 --- a/net/802/stp.c +++ b/net/802/stp.c @@ -88,9 +88,9 @@ void stp_proto_unregister(const struct stp_proto *proto) { mutex_lock(&stp_proto_mutex); if (is_zero_ether_addr(proto->group_address)) - rcu_assign_pointer(stp_proto, NULL); + RCU_INIT_POINTER(stp_proto, NULL); else - rcu_assign_pointer(garp_protos[proto->group_address[5] - + RCU_INIT_POINTER(garp_protos[proto->group_address[5] - GARP_ADDR_MIN], NULL); synchronize_rcu(); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8970ba139d73..5471628d3ffe 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -133,7 +133,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) if (grp->nr_vlans == 0) { vlan_gvrp_uninit_applicant(real_dev); - rcu_assign_pointer(real_dev->vlgrp, NULL); + RCU_INIT_POINTER(real_dev->vlgrp, NULL); /* Free the group, after all cpu's are done. */ call_rcu(&grp->rcu, vlan_rcu_free); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 5f27f8e30254..f1f2f7bb6661 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) if (unlikely(!skb)) goto err_free; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); return skb; err_free: diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9d40a071d038..c8cf9391417e 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -610,7 +610,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - return dev_ethtool_get_settings(vlan->real_dev, cmd); + + return __ethtool_get_settings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, @@ -674,7 +675,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = vlan_dev_set_mac_address, .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 2252c2085dac..52cfd0c3ea71 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -242,8 +242,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, if (brdev->payload == p_bridged) { skb_push(skb, 2); memset(skb->data, 0, 2); - } else { /* p_routed */ - skb_pull(skb, ETH_HLEN); } } skb_debug(skb); diff --git a/net/atm/lec.c b/net/atm/lec.c index 215c9fad7cdf..f1964caa0f83 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -643,7 +643,7 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_start_xmit = lec_start_xmit, .ndo_change_mtu = lec_change_mtu, .ndo_tx_timeout = lec_tx_timeout, - .ndo_set_multicast_list = lec_set_multicast_list, + .ndo_set_rx_mode = lec_set_multicast_list, }; static const unsigned char lec_ctrl_magic[] = { diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h index 216337bb841f..df4a5a943088 100644 --- a/net/batman-adv/aggregation.h +++ b/net/batman-adv/aggregation.h @@ -28,8 +28,7 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int tt_num_changes) { - int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes * - sizeof(struct tt_change)); + int next_buff_pos = buff_pos + BAT_PACKET_LEN + tt_len(tt_num_changes); return (next_buff_pos <= packet_len) && (next_buff_pos <= MAX_AGGREGATION_BYTES); diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index cd15deba60a1..b8a7414c3571 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -380,6 +380,7 @@ static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr, BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu); +BAT_ATTR_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode); static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode); BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL); @@ -396,6 +397,7 @@ static struct bat_attribute *mesh_attrs[] = { &bat_attr_aggregated_ogms, &bat_attr_bonding, &bat_attr_fragmentation, + &bat_attr_ap_isolation, &bat_attr_vis_mode, &bat_attr_gw_mode, &bat_attr_orig_interval, diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index c1f4bfc09cc3..0be9ff346fa0 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -97,12 +97,12 @@ static void bit_shift(unsigned long *seq_bits, int32_t n) (seq_bits[i - word_num - 1] >> (WORD_BIT_SIZE-word_offset)); /* and the upper part of the right half and shift it left to - * it's position */ + * its position */ /* for our example that would be: word[0] = 9800 + 0076 = * 9876 */ } - /* now for our last word, i==word_num, we only have the it's "left" - * half. that's the 1000 word in our example.*/ + /* now for our last word, i==word_num, we only have its "left" half. + * that's the 1000 word in our example.*/ seq_bits[i] = (seq_bits[i - word_num] << word_offset); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 056180ef9e1a..619fb73b3b76 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -532,14 +532,14 @@ static bool is_type_dhcprequest(struct sk_buff *skb, int header_len) pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1; /* Access the dhcp option lists. Each entry is made up by: - * - octect 1: option type - * - octect 2: option data len (only if type != 255 and 0) - * - octect 3: option data */ + * - octet 1: option type + * - octet 2: option data len (only if type != 255 and 0) + * - octet 3: option data */ while (*p != 255 && !ret) { - /* p now points to the first octect: option type */ + /* p now points to the first octet: option type */ if (*p == 53) { /* type 53 is the message type option. - * Jump the len octect and go to the data octect */ + * Jump the len octet and go to the data octet */ if (pkt_len < 2) goto out; p += 2; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index db7aacf1e095..bf91e4d8a47f 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -249,7 +249,7 @@ static void hardif_activate_interface(struct hard_iface *hard_iface) /** * the first active interface becomes our primary interface or - * the next active interface after the old primay interface was removed + * the next active interface after the old primary interface was removed */ primary_if = primary_if_get_selected(bat_priv); if (!primary_if) @@ -573,7 +573,7 @@ out: return NOTIFY_DONE; } -/* receive a packet with the batman ethertype coming on a hard +/* incoming packets with the batman ethertype received on any active hard * interface */ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, @@ -681,6 +681,36 @@ err_out: return NET_RX_DROP; } +/* This function returns true if the interface represented by ifindex is a + * 802.11 wireless device */ +bool is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL */ + if (net_device->wireless_handlers) + ret = true; + else +#endif + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + ret = true; +out: + if (net_device) + dev_put(net_device); + return ret; +} + struct notifier_block hard_if_notifier = { .notifier_call = hard_if_event, }; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 442eacbc9e3a..67f78d1a63b4 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -42,6 +42,7 @@ void hardif_remove_interfaces(void); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); void hardif_free_rcu(struct rcu_head *rcu); +bool is_wifi_iface(int ifindex); static inline void hardif_free_ref(struct hard_iface *hard_iface) { diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index dd5c9fd7a905..d20aa71ba1e8 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -76,19 +76,30 @@ static inline void hash_delete(struct hashtable_t *hash, hash_destroy(hash); } -/* adds data to the hashtable. returns 0 on success, -1 on error */ +/** + * hash_add - adds data to the hashtable + * @hash: storage hash table + * @compare: callback to determine if 2 hash elements are identical + * @choose: callback calculating the hash index + * @data: data passed to the aforementioned callbacks as argument + * @data_node: to be added element + * + * Returns 0 on success, 1 if the element already is in the hash + * and -1 on error. + */ + static inline int hash_add(struct hashtable_t *hash, hashdata_compare_cb compare, hashdata_choose_cb choose, const void *data, struct hlist_node *data_node) { - int index; + int index, ret = -1; struct hlist_head *head; struct hlist_node *node; spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) - goto err; + goto out; index = choose(data, hash->size); head = &hash->table[index]; @@ -99,6 +110,7 @@ static inline int hash_add(struct hashtable_t *hash, if (!compare(node, data)) continue; + ret = 1; goto err_unlock; } rcu_read_unlock(); @@ -108,12 +120,13 @@ static inline int hash_add(struct hashtable_t *hash, hlist_add_head_rcu(data_node, head); spin_unlock_bh(list_lock); - return 0; + ret = 0; + goto out; err_unlock: rcu_read_unlock(); -err: - return -1; +out: + return ret; } /* removes data from hash, if found. returns pointer do data on success, so you diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b0f9068ade57..79b9ae522ce9 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -107,7 +107,7 @@ int mesh_init(struct net_device *soft_iface) if (tt_init(bat_priv) < 1) goto err; - tt_local_add(soft_iface, soft_iface->dev_addr); + tt_local_add(soft_iface, soft_iface->dev_addr, NULL_IFINDEX); if (vis_init(bat_priv) < 1) goto err; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index a6df61a6933b..60b369635b4d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -44,7 +44,7 @@ #define PURGE_TIMEOUT 200 #define TT_LOCAL_TIMEOUT 3600 /* in seconds */ #define TT_CLIENT_ROAM_TIMEOUT 600 -/* sliding packet range of received originator messages in squence numbers +/* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ #define TQ_LOCAL_WINDOW_SIZE 64 #define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */ @@ -62,6 +62,8 @@ #define NO_FLAGS 0 +#define NULL_IFINDEX 0 /* dummy ifindex used to avoid iface checks */ + #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ @@ -133,7 +135,7 @@ enum dbg_level { #include <linux/mutex.h> /* mutex */ #include <linux/module.h> /* needed by all modules */ #include <linux/netdevice.h> /* netdevice */ -#include <linux/etherdevice.h> /* ethernet address classifaction */ +#include <linux/etherdevice.h> /* ethernet address classification */ #include <linux/if_ether.h> /* ethernet header */ #include <linux/poll.h> /* poll_table */ #include <linux/kthread.h> /* kernel threads */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index f3c3f620d195..d448018e514f 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -252,7 +252,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr) hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, orig_node, &orig_node->hash_entry); - if (hash_added < 0) + if (hash_added != 0) goto free_bcast_own_sum; return orig_node; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index b76b4be10b92..8802eab2a46d 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -84,6 +84,7 @@ enum tt_query_flags { enum tt_client_flags { TT_CLIENT_DEL = 1 << 0, TT_CLIENT_ROAM = 1 << 1, + TT_CLIENT_WIFI = 1 << 2, TT_CLIENT_NOPURGE = 1 << 8, TT_CLIENT_NEW = 1 << 9, TT_CLIENT_PENDING = 1 << 10 diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 0f32c818874d..19499281b695 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -64,66 +64,6 @@ void slide_own_bcast_window(struct hard_iface *hard_iface) } } -static void update_transtable(struct bat_priv *bat_priv, - struct orig_node *orig_node, - const unsigned char *tt_buff, - uint8_t tt_num_changes, uint8_t ttvn, - uint16_t tt_crc) -{ - uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); - bool full_table = true; - - /* the ttvn increased by one -> we can apply the attached changes */ - if (ttvn - orig_ttvn == 1) { - /* the OGM could not contain the changes because they were too - * many to fit in one frame or because they have already been - * sent TT_OGM_APPEND_MAX times. In this case send a tt - * request */ - if (!tt_num_changes) { - full_table = false; - goto request_table; - } - - tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, - (struct tt_change *)tt_buff); - - /* Even if we received the crc into the OGM, we prefer - * to recompute it to spot any possible inconsistency - * in the global table */ - orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); - - /* The ttvn alone is not enough to guarantee consistency - * because a single value could repesent different states - * (due to the wrap around). Thus a node has to check whether - * the resulting table (after applying the changes) is still - * consistent or not. E.g. a node could disconnect while its - * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case - * checking the CRC value is mandatory to detect the - * inconsistency */ - if (orig_node->tt_crc != tt_crc) - goto request_table; - - /* Roaming phase is over: tables are in sync again. I can - * unset the flag */ - orig_node->tt_poss_change = false; - } else { - /* if we missed more than one change or our tables are not - * in sync anymore -> request fresh tt data */ - if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { -request_table: - bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " - "Need to retrieve the correct information " - "(ttvn: %u last_ttvn: %u crc: %u last_crc: " - "%u num_changes: %u)\n", orig_node->orig, ttvn, - orig_ttvn, tt_crc, orig_node->tt_crc, - tt_num_changes); - send_tt_request(bat_priv, orig_node, ttvn, tt_crc, - full_table); - return; - } - } -} - static void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node, struct neigh_node *neigh_node) @@ -228,7 +168,7 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, if (!neigh_node) goto out; - /* if orig_node is direct neighbour update neigh_node last_valid */ + /* if orig_node is direct neighbor update neigh_node last_valid */ if (orig_node == orig_neigh_node) neigh_node->last_valid = jiffies; @@ -473,7 +413,7 @@ static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, if (router && (router->tq_avg > neigh_node->tq_avg)) goto update_tt; - /* if the TQ is the same and the link not more symetric we + /* if the TQ is the same and the link not more symmetric we * won't consider it either */ if (router && (neigh_node->tq_avg == router->tq_avg)) { orig_node_tmp = router->orig_node; @@ -500,10 +440,9 @@ update_tt: if (((batman_packet->orig != ethhdr->h_source) && (batman_packet->ttl > 2)) || (batman_packet->flags & PRIMARIES_FIRST_HOP)) - update_transtable(bat_priv, orig_node, tt_buff, - batman_packet->tt_num_changes, - batman_packet->ttvn, - batman_packet->tt_crc); + tt_update_orig(bat_priv, orig_node, tt_buff, + batman_packet->tt_num_changes, + batman_packet->ttvn, batman_packet->tt_crc); if (orig_node->gw_flags != batman_packet->gw_flags) gw_node_update(bat_priv, orig_node, batman_packet->gw_flags); @@ -1243,7 +1182,7 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) } break; case TT_RESPONSE: - /* packet needs to be linearised to access the TT changes */ + /* packet needs to be linearized to access the TT changes */ if (skb_linearize(skb) < 0) goto out; @@ -1300,7 +1239,7 @@ int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if) roam_adv_packet->client); tt_global_add(bat_priv, orig_node, roam_adv_packet->client, - atomic_read(&orig_node->last_ttvn) + 1, true); + atomic_read(&orig_node->last_ttvn) + 1, true, false); /* Roaming phase starts: I have new information but the ttvn has not * been incremented yet. This flag will make me check all the incoming @@ -1536,7 +1475,7 @@ static int check_unicast_ttvn(struct bat_priv *bat_priv, ethhdr = (struct ethhdr *)(skb->data + sizeof(struct unicast_packet)); - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + orig_node = transtable_search(bat_priv, NULL, ethhdr->h_dest); if (!orig_node) { if (!is_my_client(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 58d14472068c..57ae80936911 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -135,7 +135,7 @@ static void send_packet_to_if(struct forw_packet *forw_packet, "Forwarding")); bat_dbg(DBG_BATMAN, bat_priv, "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," - " IDF %s, hvn %d) on interface %s [%pM]\n", + " IDF %s, ttvn %d) on interface %s [%pM]\n", fwd_str, (packet_num > 0 ? "aggregated " : ""), batman_packet->orig, ntohl(batman_packet->seqno), batman_packet->tq, batman_packet->ttl, @@ -313,7 +313,7 @@ void schedule_own_packet(struct hard_iface *hard_iface) prepare_packet_buffer(bat_priv, hard_iface); } - /* if the changes have been sent enough times */ + /* if the changes have been sent often enough */ if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt)) reset_packet_buffer(bat_priv, hard_iface); } @@ -454,7 +454,7 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv, } /* add a broadcast packet to the queue and setup timers. broadcast packets - * are sent multiple times to increase probability for beeing received. + * are sent multiple times to increase probability for being received. * * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on * errors. @@ -612,7 +612,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, &bat_priv->forw_bcast_list, list) { /** - * if purge_outstanding_packets() was called with an argmument + * if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if ((hard_iface) && @@ -641,7 +641,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, &bat_priv->forw_bat_list, list) { /** - * if purge_outstanding_packets() was called with an argmument + * if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if ((hard_iface) && diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 3e2f91ffa4e2..402fd96239d8 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -532,11 +532,11 @@ static int interface_set_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - /* only modify transtable if it has been initialised before */ + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { tt_local_remove(bat_priv, dev->dev_addr, "mac address changed", false); - tt_local_add(dev, addr->sa_data); + tt_local_add(dev, addr->sa_data, NULL_IFINDEX); } memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); @@ -595,9 +595,10 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) goto dropped; /* Register the client MAC in the transtable */ - tt_local_add(soft_iface, ethhdr->h_source); + tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + orig_node = transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest); if (is_multicast_ether_addr(ethhdr->h_dest) || (orig_node && orig_node->gw_flags)) { ret = gw_is_target(bat_priv, skb, orig_node); @@ -739,6 +740,9 @@ void interface_rx(struct net_device *soft_iface, soft_iface->last_rx = jiffies; + if (is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest)) + goto dropped; + netif_rx(skb); goto out; @@ -812,6 +816,7 @@ struct net_device *softif_create(const char *name) atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); + atomic_set(&bat_priv->ap_isolation, 0); atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE); atomic_set(&bat_priv->gw_mode, GW_MODE_OFF); atomic_set(&bat_priv->gw_sel_class, 20); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index fb6931d00cd7..cc53f78e448c 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -183,7 +183,8 @@ static int tt_local_init(struct bat_priv *bat_priv) return 1; } -void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + int ifindex) { struct bat_priv *bat_priv = netdev_priv(soft_iface); struct tt_local_entry *tt_local_entry = NULL; @@ -207,6 +208,8 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) memcpy(tt_local_entry->addr, addr, ETH_ALEN); tt_local_entry->last_seen = jiffies; tt_local_entry->flags = NO_FLAGS; + if (is_wifi_iface(ifindex)) + tt_local_entry->flags |= TT_CLIENT_WIFI; atomic_set(&tt_local_entry->refcount, 2); /* the batman interface mac address should never be purged */ @@ -329,7 +332,7 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); __hlist_for_each_rcu(node, head) - buf_size += 21; + buf_size += 29; rcu_read_unlock(); } @@ -348,8 +351,19 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(tt_local_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 22, " * %pM\n", - tt_local_entry->addr); + pos += snprintf(buff + pos, 30, " * %pM " + "[%c%c%c%c%c]\n", + tt_local_entry->addr, + (tt_local_entry->flags & + TT_CLIENT_ROAM ? 'R' : '.'), + (tt_local_entry->flags & + TT_CLIENT_NOPURGE ? 'P' : '.'), + (tt_local_entry->flags & + TT_CLIENT_NEW ? 'N' : '.'), + (tt_local_entry->flags & + TT_CLIENT_PENDING ? 'X' : '.'), + (tt_local_entry->flags & + TT_CLIENT_WIFI ? 'W' : '.')); } rcu_read_unlock(); } @@ -369,8 +383,8 @@ static void tt_local_set_pending(struct bat_priv *bat_priv, tt_local_event(bat_priv, tt_local_entry->addr, tt_local_entry->flags | flags); - /* The local client has to be merked as "pending to be removed" but has - * to be kept in the table in order to send it in an full tables + /* The local client has to be marked as "pending to be removed" but has + * to be kept in the table in order to send it in a full table * response issued before the net ttvn increment (consistency check) */ tt_local_entry->flags |= TT_CLIENT_PENDING; } @@ -495,7 +509,8 @@ static void tt_changes_list_free(struct bat_priv *bat_priv) /* caller must hold orig_node refcount */ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, - const unsigned char *tt_addr, uint8_t ttvn, bool roaming) + const unsigned char *tt_addr, uint8_t ttvn, bool roaming, + bool wifi) { struct tt_global_entry *tt_global_entry; struct orig_node *orig_node_tmp; @@ -537,6 +552,9 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, tt_global_entry->roam_at = 0; } + if (wifi) + tt_global_entry->flags |= TT_CLIENT_WIFI; + bat_dbg(DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", tt_global_entry->addr, orig_node->orig); @@ -582,8 +600,8 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %-15s %s\n", - "Client", "(TTVN)", "Originator", "(Curr TTVN)"); + seq_printf(seq, " %-13s %s %-15s %s %s\n", + "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags"); buf_size = 1; /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via @@ -593,7 +611,7 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); __hlist_for_each_rcu(node, head) - buf_size += 59; + buf_size += 67; rcu_read_unlock(); } @@ -612,14 +630,20 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(tt_global_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 61, - " * %pM (%3u) via %pM (%3u)\n", - tt_global_entry->addr, + pos += snprintf(buff + pos, 69, + " * %pM (%3u) via %pM (%3u) " + "[%c%c%c]\n", tt_global_entry->addr, tt_global_entry->ttvn, tt_global_entry->orig_node->orig, (uint8_t) atomic_read( &tt_global_entry->orig_node-> - last_ttvn)); + last_ttvn), + (tt_global_entry->flags & + TT_CLIENT_ROAM ? 'R' : '.'), + (tt_global_entry->flags & + TT_CLIENT_PENDING ? 'X' : '.'), + (tt_global_entry->flags & + TT_CLIENT_WIFI ? 'W' : '.')); } rcu_read_unlock(); } @@ -774,30 +798,56 @@ static void tt_global_table_free(struct bat_priv *bat_priv) bat_priv->tt_global_hash = NULL; } +static bool _is_ap_isolated(struct tt_local_entry *tt_local_entry, + struct tt_global_entry *tt_global_entry) +{ + bool ret = false; + + if (tt_local_entry->flags & TT_CLIENT_WIFI && + tt_global_entry->flags & TT_CLIENT_WIFI) + ret = true; + + return ret; +} + struct orig_node *transtable_search(struct bat_priv *bat_priv, - const uint8_t *addr) + const uint8_t *src, const uint8_t *addr) { - struct tt_global_entry *tt_global_entry; + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; struct orig_node *orig_node = NULL; - tt_global_entry = tt_global_hash_find(bat_priv, addr); + if (src && atomic_read(&bat_priv->ap_isolation)) { + tt_local_entry = tt_local_hash_find(bat_priv, src); + if (!tt_local_entry) + goto out; + } + tt_global_entry = tt_global_hash_find(bat_priv, addr); if (!tt_global_entry) goto out; + /* check whether the clients should not communicate due to AP + * isolation */ + if (tt_local_entry && _is_ap_isolated(tt_local_entry, tt_global_entry)) + goto out; + if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount)) - goto free_tt; + goto out; /* A global client marked as PENDING has already moved from that * originator */ if (tt_global_entry->flags & TT_CLIENT_PENDING) - goto free_tt; + goto out; orig_node = tt_global_entry->orig_node; -free_tt: - tt_global_entry_free_ref(tt_global_entry); out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return orig_node; } @@ -1029,8 +1079,9 @@ out: return skb; } -int send_tt_request(struct bat_priv *bat_priv, struct orig_node *dst_orig_node, - uint8_t ttvn, uint16_t tt_crc, bool full_table) +static int send_tt_request(struct bat_priv *bat_priv, + struct orig_node *dst_orig_node, + uint8_t ttvn, uint16_t tt_crc, bool full_table) { struct sk_buff *skb = NULL; struct tt_query_packet *tt_request; @@ -1137,12 +1188,12 @@ static bool send_other_tt_response(struct bat_priv *bat_priv, orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_request->ttvn; - /* I have not the requested data */ + /* I don't have the requested data */ if (orig_ttvn != req_ttvn || tt_request->tt_data != req_dst_orig_node->tt_crc) goto out; - /* If it has explicitly been requested the full table */ + /* If the full table has been explicitly requested */ if (tt_request->flags & TT_FULL_TABLE || !req_dst_orig_node->tt_buff) full_table = true; @@ -1363,7 +1414,9 @@ static void _tt_update_changes(struct bat_priv *bat_priv, (tt_change + i)->flags & TT_CLIENT_ROAM); else if (!tt_global_add(bat_priv, orig_node, - (tt_change + i)->addr, ttvn, false)) + (tt_change + i)->addr, ttvn, false, + (tt_change + i)->flags & + TT_CLIENT_WIFI)) /* In case of problem while storing a * global_entry, we stop the updating * procedure without committing the @@ -1403,9 +1456,10 @@ out: orig_node_free_ref(orig_node); } -void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, - uint16_t tt_num_changes, uint8_t ttvn, - struct tt_change *tt_change) +static void tt_update_changes(struct bat_priv *bat_priv, + struct orig_node *orig_node, + uint16_t tt_num_changes, uint8_t ttvn, + struct tt_change *tt_change) { _tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, ttvn); @@ -1720,3 +1774,90 @@ void tt_commit_changes(struct bat_priv *bat_priv) atomic_inc(&bat_priv->ttvn); bat_priv->tt_poss_change = false; } + +bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst) +{ + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; + bool ret = true; + + if (!atomic_read(&bat_priv->ap_isolation)) + return false; + + tt_local_entry = tt_local_hash_find(bat_priv, dst); + if (!tt_local_entry) + goto out; + + tt_global_entry = tt_global_hash_find(bat_priv, src); + if (!tt_global_entry) + goto out; + + if (_is_ap_isolated(tt_local_entry, tt_global_entry)) + goto out; + + ret = false; + +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return ret; +} + +void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes, + uint8_t ttvn, uint16_t tt_crc) +{ + uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + bool full_table = true; + + /* the ttvn increased by one -> we can apply the attached changes */ + if (ttvn - orig_ttvn == 1) { + /* the OGM could not contain the changes due to their size or + * because they have already been sent TT_OGM_APPEND_MAX times. + * In this case send a tt request */ + if (!tt_num_changes) { + full_table = false; + goto request_table; + } + + tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, + (struct tt_change *)tt_buff); + + /* Even if we received the precomputed crc with the OGM, we + * prefer to recompute it to spot any possible inconsistency + * in the global table */ + orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); + + /* The ttvn alone is not enough to guarantee consistency + * because a single value could represent different states + * (due to the wrap around). Thus a node has to check whether + * the resulting table (after applying the changes) is still + * consistent or not. E.g. a node could disconnect while its + * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case + * checking the CRC value is mandatory to detect the + * inconsistency */ + if (orig_node->tt_crc != tt_crc) + goto request_table; + + /* Roaming phase is over: tables are in sync again. I can + * unset the flag */ + orig_node->tt_poss_change = false; + } else { + /* if we missed more than one change or our tables are not + * in sync anymore -> request fresh tt data */ + if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { +request_table: + bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " + "Need to retrieve the correct information " + "(ttvn: %u last_ttvn: %u crc: %u last_crc: " + "%u num_changes: %u)\n", orig_node->orig, ttvn, + orig_ttvn, tt_crc, orig_node->tt_crc, + tt_num_changes); + send_tt_request(bat_priv, orig_node, ttvn, tt_crc, + full_table); + return; + } + } +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index d4122cba53b8..30efd49881a3 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -26,15 +26,16 @@ int tt_len(int changes_num); int tt_changes_fill_buffer(struct bat_priv *bat_priv, unsigned char *buff, int buff_len); int tt_init(struct bat_priv *bat_priv); -void tt_local_add(struct net_device *soft_iface, const uint8_t *addr); +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + int ifindex); void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr, const char *message, bool roaming); int tt_local_seq_print_text(struct seq_file *seq, void *offset); void tt_global_add_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *tt_buff, int tt_buff_len); -int tt_global_add(struct bat_priv *bat_priv, - struct orig_node *orig_node, const unsigned char *addr, - uint8_t ttvn, bool roaming); +int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *addr, uint8_t ttvn, bool roaming, + bool wifi); int tt_global_seq_print_text(struct seq_file *seq, void *offset); void tt_global_del_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const char *message); @@ -42,25 +43,23 @@ void tt_global_del(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *addr, const char *message, bool roaming); struct orig_node *transtable_search(struct bat_priv *bat_priv, - const uint8_t *addr); + const uint8_t *src, const uint8_t *addr); void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *tt_buff, uint8_t tt_num_changes); uint16_t tt_local_crc(struct bat_priv *bat_priv); uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node); void tt_free(struct bat_priv *bat_priv); -int send_tt_request(struct bat_priv *bat_priv, - struct orig_node *dst_orig_node, uint8_t hvn, - uint16_t tt_crc, bool full_table); bool send_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_request); -void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, - uint16_t tt_num_changes, uint8_t ttvn, - struct tt_change *tt_change); bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr); void handle_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_response); void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, struct orig_node *orig_node); void tt_commit_changes(struct bat_priv *bat_priv); +bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst); +void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes, + uint8_t ttvn, uint16_t tt_crc); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 25bd1db35370..1ae355750511 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -57,7 +57,7 @@ struct hard_iface { * @batman_seqno_reset: time when the batman seqno window was reset * @gw_flags: flags related to gateway class * @flags: for now only VIS_SERVER flag - * @last_real_seqno: last and best known squence number + * @last_real_seqno: last and best known sequence number * @last_ttl: ttl of last received packet * @last_bcast_seqno: last broadcast sequence number received by this host * @@ -146,6 +146,7 @@ struct bat_priv { atomic_t aggregated_ogms; /* boolean */ atomic_t bonding; /* boolean */ atomic_t fragmentation; /* boolean */ + atomic_t ap_isolation; /* boolean */ atomic_t vis_mode; /* VIS_TYPE_* */ atomic_t gw_mode; /* GW_MODE_* */ atomic_t gw_sel_class; /* uint */ @@ -156,7 +157,7 @@ struct bat_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - atomic_t ttvn; /* tranlation table version number */ + atomic_t ttvn; /* translation table version number */ atomic_t tt_ogm_append_cnt; atomic_t tt_local_changes; /* changes registered in a OGM interval */ /* The tt_poss_change flag is used to detect an ongoing roaming phase. diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 32b125fb3d3b..07d1c1da89dd 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -299,8 +299,10 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) goto find_router; } - /* check for tt host - increases orig_node refcount */ - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + /* check for tt host - increases orig_node refcount. + * returns NULL in case of AP isolation */ + orig_node = transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest); find_router: /** diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 62f54b954625..8fd5535544b9 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -24,7 +24,7 @@ #include "packet.h" -#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ +#define FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */ #define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 8a1b98589d76..fb9b19fc638d 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -131,7 +131,7 @@ static void vis_data_insert_interface(const uint8_t *interface, return; } - /* its a new address, add it to the list */ + /* it's a new address, add it to the list */ entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return; @@ -465,7 +465,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, /* try to add it */ hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, info, &info->hash_entry); - if (hash_added < 0) { + if (hash_added != 0) { /* did not work (for some reason) */ kref_put(&info->refcount, free_info); info = NULL; @@ -920,7 +920,7 @@ int vis_init(struct bat_priv *bat_priv) hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, bat_priv->my_vis_info, &bat_priv->my_vis_info->hash_entry); - if (hash_added < 0) { + if (hash_added != 0) { pr_err("Can't add own vis packet into hash\n"); /* not in hash, need to remove it manually. */ kref_put(&bat_priv->my_vis_info->refcount, free_info); diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d4f5dff7c955..bc4086480d97 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -217,7 +217,7 @@ static const struct net_device_ops bnep_netdev_ops = { .ndo_stop = bnep_net_close, .ndo_start_xmit = bnep_net_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_set_multicast_list = bnep_net_set_mc_list, + .ndo_set_rx_mode = bnep_net_set_mc_list, .ndo_set_mac_address = bnep_net_set_mac_addr, .ndo_tx_timeout = bnep_net_timeout, .ndo_change_mtu = eth_change_mtu, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 32b8f9f7f79e..ee68eee79e52 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -304,7 +304,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_start_xmit = br_dev_xmit, .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, - .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_set_rx_mode = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 3176e2e13d9b..043a5eb8cafc 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -33,20 +33,18 @@ */ static int port_cost(struct net_device *dev) { - if (dev->ethtool_ops && dev->ethtool_ops->get_settings) { - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, }; - - if (!dev_ethtool_get_settings(dev, &ecmd)) { - switch (ethtool_cmd_speed(&ecmd)) { - case SPEED_10000: - return 2; - case SPEED_1000: - return 4; - case SPEED_100: - return 19; - case SPEED_10: - return 100; - } + struct ethtool_cmd ecmd; + + if (!__ethtool_get_settings(dev, &ecmd)) { + switch (ethtool_cmd_speed(&ecmd)) { + case SPEED_10000: + return 2; + case SPEED_1000: + return 4; + case SPEED_100: + return 19; + case SPEED_10: + return 100; } } @@ -417,6 +415,7 @@ put_back: int br_del_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; + bool changed_addr; p = br_port_get_rtnl(dev); if (!p || p->br != br) @@ -425,9 +424,12 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) del_nbp(p); spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); + changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + netdev_update_features(br->dev); return 0; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 6545ee9591d1..a76b62135558 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_device *dev = ptr; struct net_bridge_port *p; struct net_bridge *br; + bool changed_addr; int err; /* register of bridge completed, add sysfs entries */ @@ -57,8 +58,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v case NETDEV_CHANGEADDR: spin_lock_bh(&br->lock); br_fdb_changeaddr(p, dev->dev_addr); - br_stp_recalculate_bridge_id(br); + changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + break; case NETDEV_CHANGE: diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 1bcaf36ad612..40d8258bf74f 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -87,14 +87,14 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - rcu_assign_pointer(br_should_route_hook, + RCU_INIT_POINTER(br_should_route_hook, (br_should_route_hook_t *)ebt_broute); return 0; } static void __exit ebtable_broute_fini(void) { - rcu_assign_pointer(br_should_route_hook, NULL); + RCU_INIT_POINTER(br_should_route_hook, NULL); synchronize_net(); unregister_pernet_subsys(&broute_net_ops); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 2b5ca1a0054d..5864cc491369 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1198,7 +1198,8 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) if (table->check && table->check(newinfo, table->valid_hooks)) { BUGPRINT("The table doesn't like its own initial data, lol\n"); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto free_chainstack; } table->private = newinfo; diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 52fe33bee029..f07ab8c22224 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -78,10 +78,8 @@ struct cfcnfg *cfcnfg_create(void) /* Initiate this layer */ this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } this->mux = cfmuxl_create(); if (!this->mux) goto out_of_mem; @@ -108,8 +106,6 @@ struct cfcnfg *cfcnfg_create(void) return this; out_of_mem: - pr_warn("Out of memory\n"); - synchronize_rcu(); kfree(this->mux); @@ -448,10 +444,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, "- unknown channel type\n"); goto unlock; } - if (!servicel) { - pr_warn("Out of memory\n"); + if (!servicel) goto unlock; - } layer_set_dn(servicel, cnfg->mux); cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); layer_set_up(servicel, adapt_layer); @@ -497,10 +491,8 @@ got_phyid: case CFPHYTYPE_FRAG: phy_driver = cfserl_create(CFPHYTYPE_FRAG, phyid, stx); - if (!phy_driver) { - pr_warn("Out of memory\n"); + if (!phy_driver) goto out; - } break; case CFPHYTYPE_CAIF: phy_driver = NULL; @@ -521,7 +513,6 @@ got_phyid: frml = cffrml_create(phyid, fcs); if (!frml) { - pr_warn("Out of memory\n"); kfree(phyinfo); goto out; } diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index e22671bed669..5cf52225692e 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -35,15 +35,12 @@ struct cflayer *cfctrl_create(void) { struct dev_info dev_info; struct cfctrl *this = - kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + kzalloc(sizeof(struct cfctrl), GFP_ATOMIC); + if (!this) return NULL; - } caif_assert(offsetof(struct cfctrl, serv.layer) == 0); memset(&dev_info, 0, sizeof(dev_info)); dev_info.id = 0xff; - memset(this, 0, sizeof(*this)); cfsrvl_init(&this->serv, 0, &dev_info, false); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); @@ -180,10 +177,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) struct cfctrl *cfctrl = container_obj(layer); struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); struct cflayer *dn = cfctrl->serv.layer.dn; - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return; - } if (!dn) { pr_debug("not able to send enum request\n"); return; @@ -224,10 +219,8 @@ int cfctrl_linkup_request(struct cflayer *layer, } pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype); cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid); @@ -275,10 +268,8 @@ int cfctrl_linkup_request(struct cflayer *layer, return -EINVAL; } req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) { - pr_warn("Out of memory\n"); + if (!req) return -ENOMEM; - } req->client_layer = user_layer; req->cmd = CFCTRL_CMD_LINK_SETUP; req->param = *param; @@ -312,10 +303,8 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); struct cflayer *dn = cfctrl->serv.layer.dn; - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (!dn) { pr_debug("not able to send link-down request\n"); diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 11a2af4c162a..65d6ef3cf9aa 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -19,13 +19,10 @@ static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!dbg) { - pr_warn("Out of memory\n"); + struct cfsrvl *dbg = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!dbg) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(dbg, 0, sizeof(struct cfsrvl)); cfsrvl_init(dbg, channel_id, dev_info, false); dbg->layer.receive = cfdbgl_receive; dbg->layer.transmit = cfdbgl_transmit; diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 0382dec84fdc..0f5ff27aa41c 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -26,13 +26,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!dgm) { - pr_warn("Out of memory\n"); + struct cfsrvl *dgm = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!dgm) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(dgm, 0, sizeof(struct cfsrvl)); cfsrvl_init(dgm, channel_id, dev_info, true); dgm->layer.receive = cfdgml_receive; dgm->layer.transmit = cfdgml_transmit; diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 04204b202718..f39921171d0d 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -34,11 +34,9 @@ static u32 cffrml_rcv_error; static u32 cffrml_rcv_checsum_error; struct cflayer *cffrml_create(u16 phyid, bool use_fcs) { - struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + struct cffrml *this = kzalloc(sizeof(struct cffrml), GFP_ATOMIC); + if (!this) return NULL; - } this->pcpu_refcnt = alloc_percpu(int); if (this->pcpu_refcnt == NULL) { kfree(this); @@ -47,7 +45,6 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs) caif_assert(offsetof(struct cffrml, layer) == 0); - memset(this, 0, sizeof(struct cflayer)); this->layer.receive = cffrml_receive; this->layer.transmit = cffrml_transmit; this->layer.ctrlcmd = cffrml_ctrlcmd; diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index c23979e79dfa..b36f24a4c8e7 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -108,7 +108,7 @@ struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid) int idx = phyid % DN_CACHE_SIZE; spin_lock_bh(&muxl->transmit_lock); - rcu_assign_pointer(muxl->dn_cache[idx], NULL); + RCU_INIT_POINTER(muxl->dn_cache[idx], NULL); dn = get_from_id(&muxl->frml_list, phyid); if (dn == NULL) goto out; @@ -164,7 +164,7 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id) if (up == NULL) goto out; - rcu_assign_pointer(muxl->up_cache[idx], NULL); + RCU_INIT_POINTER(muxl->up_cache[idx], NULL); list_del_rcu(&up->node); out: spin_unlock_bh(&muxl->receive_lock); @@ -261,7 +261,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, idx = layer->id % UP_CACHE_SIZE; spin_lock_bh(&muxl->receive_lock); - rcu_assign_pointer(muxl->up_cache[idx], NULL); + RCU_INIT_POINTER(muxl->up_cache[idx], NULL); list_del_rcu(&layer->node); spin_unlock_bh(&muxl->receive_lock); } diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index 0deabb440051..81660f809713 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -46,13 +46,10 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, int mtu_size) { int tmp; - struct cfrfml *this = - kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); + struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } cfsrvl_init(&this->serv, channel_id, dev_info, false); this->serv.release = cfrfml_release; diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 2715c84cfa87..797c8d165993 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -33,13 +33,10 @@ static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, struct cflayer *cfserl_create(int type, int instance, bool use_stx) { - struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC); + if (!this) return NULL; - } caif_assert(offsetof(struct cfserl, layer) == 0); - memset(this, 0, sizeof(struct cfserl)); this->layer.receive = cfserl_receive; this->layer.transmit = cfserl_transmit; this->layer.ctrlcmd = cfserl_ctrlcmd; diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 535a1e72b366..b99f5b22689d 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -108,10 +108,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) struct caif_payload_info *info; u8 flow_on = SRVL_FLOW_ON; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { pr_err("Packet is erroneous!\n"); @@ -130,10 +128,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) struct caif_payload_info *info; u8 flow_off = SRVL_FLOW_OFF; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { pr_err("Packet is erroneous!\n"); diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 98e027db18ed..53e49f3e3af3 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -26,13 +26,10 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!util) { - pr_warn("Out of memory\n"); + struct cfsrvl *util = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!util) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(util, 0, sizeof(struct cfsrvl)); cfsrvl_init(util, channel_id, dev_info, true); util->layer.receive = cfutill_receive; util->layer.transmit = cfutill_transmit; diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 3ec83fbc2887..910ab0661f66 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -25,13 +25,10 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!vei) { - pr_warn("Out of memory\n"); + struct cfsrvl *vei = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!vei) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(vei, 0, sizeof(struct cfsrvl)); cfsrvl_init(vei, channel_id, dev_info, true); vei->layer.receive = cfvei_receive; vei->layer.transmit = cfvei_transmit; diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index b2f5989ad455..e3f37db40ac3 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -21,14 +21,11 @@ static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!vid) { - pr_warn("Out of memory\n"); + struct cfsrvl *vid = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!vid) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(vid, 0, sizeof(struct cfsrvl)); cfsrvl_init(vid, channel_id, dev_info, false); vid->layer.receive = cfvidl_receive; vid->layer.transmit = cfvidl_transmit; diff --git a/net/can/Kconfig b/net/can/Kconfig index 89395b2c8bca..03200699d274 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -40,5 +40,16 @@ config CAN_BCM CAN messages are used on the bus (e.g. in automotive environments). To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM. +config CAN_GW + tristate "CAN Gateway/Router (with netlink configuration)" + depends on CAN + default N + ---help--- + The CAN Gateway/Router is used to route (and modify) CAN frames. + It is based on the PF_CAN core infrastructure for msg filtering and + msg sending and can optionally modify routed CAN frames on the fly. + CAN frames can be routed between CAN network interfaces (one hop). + They can be modified with AND/OR/XOR/SET operations as configured + by the netlink configuration interface known e.g. from iptables. source "drivers/net/can/Kconfig" diff --git a/net/can/Makefile b/net/can/Makefile index 2d3894b32742..cef49eb1f5c7 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -10,3 +10,6 @@ can-raw-y := raw.o obj-$(CONFIG_CAN_BCM) += can-bcm.o can-bcm-y := bcm.o + +obj-$(CONFIG_CAN_GW) += can-gw.o +can-gw-y := gw.o diff --git a/net/can/af_can.c b/net/can/af_can.c index 8ce926d3b2cb..b9efa944cab9 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -719,7 +719,7 @@ int can_proto_register(const struct can_proto *cp) proto); err = -EBUSY; } else - rcu_assign_pointer(proto_tab[proto], cp); + RCU_INIT_POINTER(proto_tab[proto], cp); mutex_unlock(&proto_tab_lock); @@ -740,7 +740,7 @@ void can_proto_unregister(const struct can_proto *cp) mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[proto] != cp); - rcu_assign_pointer(proto_tab[proto], NULL); + RCU_INIT_POINTER(proto_tab[proto], NULL); mutex_unlock(&proto_tab_lock); synchronize_rcu(); diff --git a/net/can/gw.c b/net/can/gw.c new file mode 100644 index 000000000000..ac11407d3b54 --- /dev/null +++ b/net/can/gw.c @@ -0,0 +1,959 @@ +/* + * gw.c - CAN frame Gateway/Router/Bridge with netlink interface + * + * Copyright (c) 2011 Volkswagen Group Electronic Research + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * Send feedback to <socketcan-users@lists.berlios.de> + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> +#include <linux/rculist.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/skbuff.h> +#include <linux/can.h> +#include <linux/can/core.h> +#include <linux/can/gw.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#define CAN_GW_VERSION "20101209" +static __initdata const char banner[] = + KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n"; + +MODULE_DESCRIPTION("PF_CAN netlink gateway"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); +MODULE_ALIAS("can-gw"); + +HLIST_HEAD(cgw_list); +static struct notifier_block notifier; + +static struct kmem_cache *cgw_cache __read_mostly; + +/* structure that contains the (on-the-fly) CAN frame modifications */ +struct cf_mod { + struct { + struct can_frame and; + struct can_frame or; + struct can_frame xor; + struct can_frame set; + } modframe; + struct { + u8 and; + u8 or; + u8 xor; + u8 set; + } modtype; + void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf, + struct cf_mod *mod); + + /* CAN frame checksum calculation after CAN frame modifications */ + struct { + struct cgw_csum_xor xor; + struct cgw_csum_crc8 crc8; + } csum; + struct { + void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor); + void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8); + } csumfunc; +}; + + +/* + * So far we just support CAN -> CAN routing and frame modifications. + * + * The internal can_can_gw structure contains data and attributes for + * a CAN -> CAN gateway job. + */ +struct can_can_gw { + struct can_filter filter; + int src_idx; + int dst_idx; +}; + +/* list entry for CAN gateways jobs */ +struct cgw_job { + struct hlist_node list; + struct rcu_head rcu; + u32 handled_frames; + u32 dropped_frames; + struct cf_mod mod; + union { + /* CAN frame data source */ + struct net_device *dev; + } src; + union { + /* CAN frame data destination */ + struct net_device *dev; + } dst; + union { + struct can_can_gw ccgw; + /* tbc */ + }; + u8 gwtype; + u16 flags; +}; + +/* modification functions that are invoked in the hot path in can_can_gw_rcv */ + +#define MODFUNC(func, op) static void func(struct can_frame *cf, \ + struct cf_mod *mod) { op ; } + +MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id) +MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc) +MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data) +MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id) +MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc) +MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data) +MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id) +MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc) +MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data) +MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id) +MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc) +MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data) + +static inline void canframecpy(struct can_frame *dst, struct can_frame *src) +{ + /* + * Copy the struct members separately to ensure that no uninitialized + * data are copied in the 3 bytes hole of the struct. This is needed + * to make easy compares of the data in the struct cf_mod. + */ + + dst->can_id = src->can_id; + dst->can_dlc = src->can_dlc; + *(u64 *)dst->data = *(u64 *)src->data; +} + +static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re) +{ + /* + * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] + * relative to received dlc -1 .. -8 : + * e.g. for received dlc = 8 + * -1 => index = 7 (data[7]) + * -3 => index = 5 (data[5]) + * -8 => index = 0 (data[0]) + */ + + if (fr > -9 && fr < 8 && + to > -9 && to < 8 && + re > -9 && re < 8) + return 0; + else + return -EINVAL; +} + +static inline int calc_idx(int idx, int rx_dlc) +{ + if (idx < 0) + return rx_dlc + idx; + else + return idx; +} + +static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + int from = calc_idx(xor->from_idx, cf->can_dlc); + int to = calc_idx(xor->to_idx, cf->can_dlc); + int res = calc_idx(xor->result_idx, cf->can_dlc); + u8 val = xor->init_xor_val; + int i; + + if (from < 0 || to < 0 || res < 0) + return; + + if (from <= to) { + for (i = from; i <= to; i++) + val ^= cf->data[i]; + } else { + for (i = from; i >= to; i--) + val ^= cf->data[i]; + } + + cf->data[res] = val; +} + +static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + u8 val = xor->init_xor_val; + int i; + + for (i = xor->from_idx; i <= xor->to_idx; i++) + val ^= cf->data[i]; + + cf->data[xor->result_idx] = val; +} + +static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + u8 val = xor->init_xor_val; + int i; + + for (i = xor->from_idx; i >= xor->to_idx; i--) + val ^= cf->data[i]; + + cf->data[xor->result_idx] = val; +} + +static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + int from = calc_idx(crc8->from_idx, cf->can_dlc); + int to = calc_idx(crc8->to_idx, cf->can_dlc); + int res = calc_idx(crc8->result_idx, cf->can_dlc); + u8 crc = crc8->init_crc_val; + int i; + + if (from < 0 || to < 0 || res < 0) + return; + + if (from <= to) { + for (i = crc8->from_idx; i <= crc8->to_idx; i++) + crc = crc8->crctab[crc^cf->data[i]]; + } else { + for (i = crc8->from_idx; i >= crc8->to_idx; i--) + crc = crc8->crctab[crc^cf->data[i]]; + } + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + u8 crc = crc8->init_crc_val; + int i; + + for (i = crc8->from_idx; i <= crc8->to_idx; i++) + crc = crc8->crctab[crc^cf->data[i]]; + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + u8 crc = crc8->init_crc_val; + int i; + + for (i = crc8->from_idx; i >= crc8->to_idx; i--) + crc = crc8->crctab[crc^cf->data[i]]; + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +/* the receive & process & send function */ +static void can_can_gw_rcv(struct sk_buff *skb, void *data) +{ + struct cgw_job *gwj = (struct cgw_job *)data; + struct can_frame *cf; + struct sk_buff *nskb; + int modidx = 0; + + /* do not handle already routed frames - see comment below */ + if (skb_mac_header_was_set(skb)) + return; + + if (!(gwj->dst.dev->flags & IFF_UP)) { + gwj->dropped_frames++; + return; + } + + /* + * clone the given skb, which has not been done in can_rcv() + * + * When there is at least one modification function activated, + * we need to copy the skb as we want to modify skb->data. + */ + if (gwj->mod.modfunc[0]) + nskb = skb_copy(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); + + if (!nskb) { + gwj->dropped_frames++; + return; + } + + /* + * Mark routed frames by setting some mac header length which is + * not relevant for the CAN frames located in the skb->data section. + * + * As dev->header_ops is not set in CAN netdevices no one is ever + * accessing the various header offsets in the CAN skbuffs anyway. + * E.g. using the packet socket to read CAN frames is still working. + */ + skb_set_mac_header(nskb, 8); + nskb->dev = gwj->dst.dev; + + /* pointer to modifiable CAN frame */ + cf = (struct can_frame *)nskb->data; + + /* perform preprocessed modification functions if there are any */ + while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) + (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + + /* check for checksum updates when the CAN frame has been modified */ + if (modidx) { + if (gwj->mod.csumfunc.crc8) + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + + if (gwj->mod.csumfunc.xor) + (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + } + + /* clear the skb timestamp if not configured the other way */ + if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP)) + nskb->tstamp.tv64 = 0; + + /* send to netdevice */ + if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO)) + gwj->dropped_frames++; + else + gwj->handled_frames++; +} + +static inline int cgw_register_filter(struct cgw_job *gwj) +{ + return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, + gwj->ccgw.filter.can_mask, can_can_gw_rcv, + gwj, "gw"); +} + +static inline void cgw_unregister_filter(struct cgw_job *gwj) +{ + can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id, + gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); +} + +static int cgw_notifier(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct net_device *dev = (struct net_device *)data; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + if (dev->type != ARPHRD_CAN) + return NOTIFY_DONE; + + if (msg == NETDEV_UNREGISTER) { + + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + + ASSERT_RTNL(); + + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + + if (gwj->src.dev == dev || gwj->dst.dev == dev) { + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + } + } + } + + return NOTIFY_DONE; +} + +static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj) +{ + struct cgw_frame_mod mb; + struct rtcanmsg *rtcan; + struct nlmsghdr *nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*rtcan), 0); + if (!nlh) + return -EMSGSIZE; + + rtcan = nlmsg_data(nlh); + rtcan->can_family = AF_CAN; + rtcan->gwtype = gwj->gwtype; + rtcan->flags = gwj->flags; + + /* add statistics if available */ + + if (gwj->handled_frames) { + if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + if (gwj->dropped_frames) { + if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + /* check non default settings of attributes */ + + if (gwj->mod.modtype.and) { + memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.and; + if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.or) { + memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.or; + if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.xor) { + memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.xor; + if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.set) { + memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.set; + if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.csumfunc.crc8) { + if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, + &gwj->mod.csum.crc8) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + \ + NLA_ALIGN(CGW_CS_CRC8_LEN); + } + + if (gwj->mod.csumfunc.xor) { + if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, + &gwj->mod.csum.xor) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + \ + NLA_ALIGN(CGW_CS_XOR_LEN); + } + + if (gwj->gwtype == CGW_TYPE_CAN_CAN) { + + if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) { + if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter), + &gwj->ccgw.filter) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + + NLA_ALIGN(sizeof(struct can_filter)); + } + + if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + + if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + return skb->len; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */ +static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n; + int idx = 0; + int s_idx = cb->args[0]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) { + if (idx < s_idx) + goto cont; + + if (cgw_put_job(skb, gwj) < 0) + break; +cont: + idx++; + } + rcu_read_unlock(); + + cb->args[0] = idx; + + return skb->len; +} + +/* check for common and gwtype specific attributes */ +static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, + u8 gwtype, void *gwtypeattr) +{ + struct nlattr *tb[CGW_MAX+1]; + struct cgw_frame_mod mb; + int modidx = 0; + int err = 0; + + /* initialize modification & checksum data space */ + memset(mod, 0, sizeof(*mod)); + + err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, NULL); + if (err < 0) + return err; + + /* check for AND/OR/XOR/SET modifications */ + + if (tb[CGW_MOD_AND] && + nla_len(tb[CGW_MOD_AND]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.and, &mb.cf); + mod->modtype.and = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_and_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_and_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_and_data; + } + + if (tb[CGW_MOD_OR] && + nla_len(tb[CGW_MOD_OR]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.or, &mb.cf); + mod->modtype.or = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_or_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_or_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_or_data; + } + + if (tb[CGW_MOD_XOR] && + nla_len(tb[CGW_MOD_XOR]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.xor, &mb.cf); + mod->modtype.xor = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_xor_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_xor_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_xor_data; + } + + if (tb[CGW_MOD_SET] && + nla_len(tb[CGW_MOD_SET]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.set, &mb.cf); + mod->modtype.set = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_set_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_set_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_set_data; + } + + /* check for checksum operations after CAN frame modifications */ + if (modidx) { + + if (tb[CGW_CS_CRC8] && + nla_len(tb[CGW_CS_CRC8]) == CGW_CS_CRC8_LEN) { + + struct cgw_csum_crc8 *c = (struct cgw_csum_crc8 *)\ + nla_data(tb[CGW_CS_CRC8]); + + err = cgw_chk_csum_parms(c->from_idx, c->to_idx, + c->result_idx); + if (err) + return err; + + nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8], + CGW_CS_CRC8_LEN); + + /* + * select dedicated processing function to reduce + * runtime operations in receive hot path. + */ + if (c->from_idx < 0 || c->to_idx < 0 || + c->result_idx < 0) + mod->csumfunc.crc8 = cgw_csum_crc8_rel; + else if (c->from_idx <= c->to_idx) + mod->csumfunc.crc8 = cgw_csum_crc8_pos; + else + mod->csumfunc.crc8 = cgw_csum_crc8_neg; + } + + if (tb[CGW_CS_XOR] && + nla_len(tb[CGW_CS_XOR]) == CGW_CS_XOR_LEN) { + + struct cgw_csum_xor *c = (struct cgw_csum_xor *)\ + nla_data(tb[CGW_CS_XOR]); + + err = cgw_chk_csum_parms(c->from_idx, c->to_idx, + c->result_idx); + if (err) + return err; + + nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR], + CGW_CS_XOR_LEN); + + /* + * select dedicated processing function to reduce + * runtime operations in receive hot path. + */ + if (c->from_idx < 0 || c->to_idx < 0 || + c->result_idx < 0) + mod->csumfunc.xor = cgw_csum_xor_rel; + else if (c->from_idx <= c->to_idx) + mod->csumfunc.xor = cgw_csum_xor_pos; + else + mod->csumfunc.xor = cgw_csum_xor_neg; + } + } + + if (gwtype == CGW_TYPE_CAN_CAN) { + + /* check CGW_TYPE_CAN_CAN specific attributes */ + + struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr; + memset(ccgw, 0, sizeof(*ccgw)); + + /* check for can_filter in attributes */ + if (tb[CGW_FILTER] && + nla_len(tb[CGW_FILTER]) == sizeof(struct can_filter)) + nla_memcpy(&ccgw->filter, tb[CGW_FILTER], + sizeof(struct can_filter)); + + err = -ENODEV; + + /* specifying two interfaces is mandatory */ + if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF]) + return err; + + if (nla_len(tb[CGW_SRC_IF]) == sizeof(u32)) + nla_memcpy(&ccgw->src_idx, tb[CGW_SRC_IF], + sizeof(u32)); + + if (nla_len(tb[CGW_DST_IF]) == sizeof(u32)) + nla_memcpy(&ccgw->dst_idx, tb[CGW_DST_IF], + sizeof(u32)); + + /* both indices set to 0 for flushing all routing entries */ + if (!ccgw->src_idx && !ccgw->dst_idx) + return 0; + + /* only one index set to 0 is an error */ + if (!ccgw->src_idx || !ccgw->dst_idx) + return err; + } + + /* add the checks for other gwtypes here */ + + return 0; +} + +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct rtcanmsg *r; + struct cgw_job *gwj; + int err = 0; + + if (nlmsg_len(nlh) < sizeof(*r)) + return -EINVAL; + + r = nlmsg_data(nlh); + if (r->can_family != AF_CAN) + return -EPFNOSUPPORT; + + /* so far we only support CAN -> CAN routings */ + if (r->gwtype != CGW_TYPE_CAN_CAN) + return -EINVAL; + + gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); + if (!gwj) + return -ENOMEM; + + gwj->handled_frames = 0; + gwj->dropped_frames = 0; + gwj->flags = r->flags; + gwj->gwtype = r->gwtype; + + err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw); + if (err < 0) + goto out; + + err = -ENODEV; + + /* ifindex == 0 is not allowed for job creation */ + if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx) + goto out; + + gwj->src.dev = dev_get_by_index(&init_net, gwj->ccgw.src_idx); + + if (!gwj->src.dev) + goto out; + + /* check for CAN netdev not using header_ops - see gw_rcv() */ + if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops) + goto put_src_out; + + gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx); + + if (!gwj->dst.dev) + goto put_src_out; + + /* check for CAN netdev not using header_ops - see gw_rcv() */ + if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) + goto put_src_dst_out; + + ASSERT_RTNL(); + + err = cgw_register_filter(gwj); + if (!err) + hlist_add_head_rcu(&gwj->list, &cgw_list); + +put_src_dst_out: + dev_put(gwj->dst.dev); +put_src_out: + dev_put(gwj->src.dev); +out: + if (err) + kmem_cache_free(cgw_cache, gwj); + + return err; +} + +static void cgw_remove_all_jobs(void) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + + ASSERT_RTNL(); + + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + } +} + +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + struct rtcanmsg *r; + struct cf_mod mod; + struct can_can_gw ccgw; + int err = 0; + + if (nlmsg_len(nlh) < sizeof(*r)) + return -EINVAL; + + r = nlmsg_data(nlh); + if (r->can_family != AF_CAN) + return -EPFNOSUPPORT; + + /* so far we only support CAN -> CAN routings */ + if (r->gwtype != CGW_TYPE_CAN_CAN) + return -EINVAL; + + err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw); + if (err < 0) + return err; + + /* two interface indices both set to 0 => remove all entries */ + if (!ccgw.src_idx && !ccgw.dst_idx) { + cgw_remove_all_jobs(); + return 0; + } + + err = -EINVAL; + + ASSERT_RTNL(); + + /* remove only the first matching entry */ + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + + if (gwj->flags != r->flags) + continue; + + if (memcmp(&gwj->mod, &mod, sizeof(mod))) + continue; + + /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */ + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) + continue; + + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + err = 0; + break; + } + + return err; +} + +static __init int cgw_module_init(void) +{ + printk(banner); + + cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), + 0, 0, NULL); + + if (!cgw_cache) + return -ENOMEM; + + /* set notifier */ + notifier.notifier_call = cgw_notifier; + register_netdevice_notifier(¬ifier); + + if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) { + unregister_netdevice_notifier(¬ifier); + kmem_cache_destroy(cgw_cache); + return -ENOBUFS; + } + + /* Only the first call to __rtnl_register can fail */ + __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL); + __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL); + + return 0; +} + +static __exit void cgw_module_exit(void) +{ + rtnl_unregister_all(PF_CAN); + + unregister_netdevice_notifier(¬ifier); + + rtnl_lock(); + cgw_remove_all_jobs(); + rtnl_unlock(); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ + + kmem_cache_destroy(cgw_cache); +} + +module_init(cgw_module_init); +module_exit(cgw_module_exit); diff --git a/net/core/Makefile b/net/core/Makefile index 8a04dd22cf77..0d357b1c4e57 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 18ac112ea7ae..6449bed457d4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -332,7 +332,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -418,7 +418,7 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -508,7 +508,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -594,7 +594,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, int err = 0; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b579beb..4b9981caf06f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -133,6 +133,8 @@ #include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> +#include <linux/if_tunnel.h> +#include <linux/if_pppox.h> #include "net-sysfs.h" @@ -1947,9 +1949,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #ifdef CONFIG_HIGHMEM int i; if (!(dev->features & NETIF_F_HIGHDMA)) { - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (PageHighMem(skb_shinfo(skb)->frags[i].page)) + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (PageHighMem(skb_frag_page(frag))) return 1; + } } if (PCI_DMA_BUS_IS_PHYS) { @@ -1958,7 +1962,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) if (!pdev) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + dma_addr_t addr = page_to_phys(skb_frag_page(frag)); if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) return 1; } @@ -2519,24 +2524,29 @@ static inline void ____napi_schedule(struct softnet_data *sd, /* * __skb_get_rxhash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Returns a non-zero hash number on success - * and 0 on failure. + * and src/dst port numbers. Sets rxhash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * if hash is a canonical 4-tuple hash over transport ports. */ -__u32 __skb_get_rxhash(struct sk_buff *skb) +void __skb_get_rxhash(struct sk_buff *skb) { int nhoff, hash = 0, poff; const struct ipv6hdr *ip6; const struct iphdr *ip; + const struct vlan_hdr *vlan; u8 ip_proto; - u32 addr1, addr2, ihl; + u32 addr1, addr2; + u16 proto; union { u32 v32; u16 v16[2]; } ports; nhoff = skb_network_offset(skb); + proto = skb->protocol; - switch (skb->protocol) { +again: + switch (proto) { case __constant_htons(ETH_P_IP): if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; @@ -2548,7 +2558,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip->protocol; addr1 = (__force u32) ip->saddr; addr2 = (__force u32) ip->daddr; - ihl = ip->ihl; + nhoff += ip->ihl * 4; break; case __constant_htons(ETH_P_IPV6): if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) @@ -2558,20 +2568,64 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; - ihl = (40 >> 2); + nhoff += 40; break; + case __constant_htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff)) + goto done; + vlan = (const struct vlan_hdr *) (skb->data + nhoff); + proto = vlan->h_vlan_encapsulated_proto; + nhoff += sizeof(*vlan); + goto again; + case __constant_htons(ETH_P_PPP_SES): + if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff)) + goto done; + proto = *((__be16 *) (skb->data + nhoff + + sizeof(struct pppoe_hdr))); + nhoff += PPPOE_SES_HLEN; + goto again; default: goto done; } + switch (ip_proto) { + case IPPROTO_GRE: + if (pskb_may_pull(skb, nhoff + 16)) { + u8 *h = skb->data + nhoff; + __be16 flags = *(__be16 *)h; + + /* + * Only look inside GRE if version zero and no + * routing + */ + if (!(flags & (GRE_VERSION|GRE_ROUTING))) { + proto = *(__be16 *)(h + 2); + nhoff += 4; + if (flags & GRE_CSUM) + nhoff += 4; + if (flags & GRE_KEY) + nhoff += 4; + if (flags & GRE_SEQ) + nhoff += 4; + goto again; + } + } + break; + case IPPROTO_IPIP: + goto again; + default: + break; + } + ports.v32 = 0; poff = proto_ports_offset(ip_proto); if (poff >= 0) { - nhoff += ihl * 4 + poff; + nhoff += poff; if (pskb_may_pull(skb, nhoff + 4)) { ports.v32 = * (__force u32 *) (skb->data + nhoff); if (ports.v16[1] < ports.v16[0]) swap(ports.v16[0], ports.v16[1]); + skb->l4_rxhash = 1; } } @@ -2584,7 +2638,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) hash = 1; done: - return hash; + skb->rxhash = hash; } EXPORT_SYMBOL(__skb_get_rxhash); @@ -2673,13 +2727,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { if (map->len == 1 && - !rcu_dereference_raw(rxqueue->rps_flow_table)) { + !rcu_access_pointer(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; goto done; } - } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { + } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { goto done; } @@ -3094,8 +3148,8 @@ void netdev_rx_handler_unregister(struct net_device *dev) { ASSERT_RTNL(); - rcu_assign_pointer(dev->rx_handler, NULL); - rcu_assign_pointer(dev->rx_handler_data, NULL); + RCU_INIT_POINTER(dev->rx_handler, NULL); + RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); @@ -3187,10 +3241,9 @@ ncls: ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_do_receive(&skb)) { - ret = __netif_receive_skb(skb); - goto out; - } else if (unlikely(!skb)) + if (vlan_do_receive(&skb)) + goto another_round; + else if (unlikely(!skb)) goto out; } @@ -3424,7 +3477,7 @@ pull: skb_shinfo(skb)->frags[0].size -= grow; if (unlikely(!skb_shinfo(skb)->frags[0].size)) { - put_page(skb_shinfo(skb)->frags[0].page); + skb_frag_unref(skb, 0); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -3488,10 +3541,9 @@ void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_shinfo(skb)->frags[0].page)) { + !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { NAPI_GRO_CB(skb)->frag0 = - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset; + skb_frag_address(&skb_shinfo(skb)->frags[0]); NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; } } @@ -4489,9 +4541,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (ops->ndo_set_rx_mode) - ops->ndo_set_rx_mode(dev); - else { + if (!(dev->priv_flags & IFF_UNICAST_FLT)) { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ @@ -4502,10 +4552,10 @@ void __dev_set_rx_mode(struct net_device *dev) __dev_set_promiscuity(dev, -1); dev->uc_promisc = false; } - - if (ops->ndo_set_multicast_list) - ops->ndo_set_multicast_list(dev); } + + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); } void dev_set_rx_mode(struct net_device *dev) @@ -4516,30 +4566,6 @@ void dev_set_rx_mode(struct net_device *dev) } /** - * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() - * @dev: device - * @cmd: memory area for ethtool_ops::get_settings() result - * - * The cmd arg is initialized properly (cleared and - * ethtool_cmd::cmd field set to ETHTOOL_GSET). - * - * Return device's ethtool_ops::get_settings() result value or - * -EOPNOTSUPP when device doesn't expose - * ethtool_ops::get_settings() operation. - */ -int dev_ethtool_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - memset(cmd, 0, sizeof(struct ethtool_cmd)); - cmd->cmd = ETHTOOL_GSET; - return dev->ethtool_ops->get_settings(dev, cmd); -} -EXPORT_SYMBOL(dev_ethtool_get_settings); - -/** * dev_get_flags - get flags reported to userspace * @dev: device * @@ -4855,7 +4881,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -4863,7 +4889,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); case SIOCDELMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -5727,8 +5753,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); - WARN_ON(rcu_dereference_raw(dev->ip_ptr)); - WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); + WARN_ON(rcu_access_pointer(dev->ip_ptr)); + WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); if (dev->destructor) @@ -5932,7 +5958,7 @@ void free_netdev(struct net_device *dev) kfree(dev->_rx); #endif - kfree(rcu_dereference_raw(dev->ingress_queue)); + kfree(rcu_dereference_protected(dev->ingress_queue, 1)); /* Flush device addresses */ dev_addr_flush(dev); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index e2e66939ed00..283d1b863876 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global); * addresses that have no users left. The source device must be * locked by netif_tx_lock_bh. * - * This function is intended to be called from the dev->set_multicast_list - * or dev->set_rx_mode function of layered software devices. + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { diff --git a/net/core/dst.c b/net/core/dst.c index 14b33baf0733..d5e2c4c09107 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->_neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; if (neigh) { - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->_neighbour && dst->_neighbour->dev == dev) { - dst->_neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6cdba5fc2bed..f44481707124 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; - int err; + ASSERT_RTNL(); - if (!dev->ethtool_ops->get_settings) + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) return -EOPNOTSUPP; - err = dev->ethtool_ops->get_settings(dev, &cmd); + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(__ethtool_get_settings); + +static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +{ + int err; + struct ethtool_cmd cmd; + + err = __ethtool_get_settings(dev, &cmd); if (err < 0) return err; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0285b5..67c5c288cd80 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (ops->nr_goto_rules > 0) { list_for_each_entry(tmp, &ops->rules_list, list) { if (rtnl_dereference(tmp->ctarget) == rule) { - rcu_assign_pointer(tmp->ctarget, NULL); + RCU_INIT_POINTER(tmp->ctarget, NULL); ops->unresolved_rules++; } } @@ -545,7 +545,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->flags = rule->flags; if (rule->action == FR_ACT_GOTO && - rcu_dereference_raw(rule->ctarget) == NULL) + rcu_access_pointer(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { diff --git a/net/core/filter.c b/net/core/filter.c index 36f975fa87cb..8fcc2d776e09 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); ret = 0; } diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h index 283c2b993fb8..81e1ed7c8383 100644 --- a/net/core/kmap_skb.h +++ b/net/core/kmap_skb.h @@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag) local_bh_disable(); #endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); + return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ); } static inline void kunmap_skb_frag(void *vaddr) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 357bd4ee4baa..c3519c6d1b16 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev) static bool linkwatch_urgent_event(struct net_device *dev) { - return netif_running(dev) && netif_carrier_ok(dev) && - qdisc_tx_changing(dev); + if (!netif_running(dev)) + return false; + + if (dev->ifindex != dev->iflink) + return true; + + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8fab9b0bb203..4002261f20d1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -844,6 +844,19 @@ static void neigh_invalidate(struct neighbour *neigh) skb_queue_purge(&neigh->arp_queue); } +static void neigh_probe(struct neighbour *neigh) + __releases(neigh->lock) +{ + struct sk_buff *skb = skb_peek(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb = skb_copy(skb, GFP_ATOMIC); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + kfree_skb(skb); +} + /* Called when a timer expires for a neighbour entry. */ static void neigh_timer_handler(unsigned long arg) @@ -920,14 +933,7 @@ static void neigh_timer_handler(unsigned long arg) neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { - struct sk_buff *skb = skb_peek(&neigh->arp_queue); - /* keep skb alive even if arp_queue overflows */ - if (skb) - skb = skb_copy(skb, GFP_ATOMIC); - write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_inc(&neigh->probes); - kfree_skb(skb); + neigh_probe(neigh); } else { out: write_unlock(&neigh->lock); @@ -942,7 +948,7 @@ out: int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; - unsigned long now; + bool immediate_probe = false; write_lock_bh(&neigh->lock); @@ -950,14 +956,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; - now = jiffies; - if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (neigh->parms->mcast_probes + neigh->parms->app_probes) { + unsigned long next, now = jiffies; + atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; - neigh->updated = jiffies; - neigh_add_timer(neigh, now + 1); + neigh->updated = now; + next = now + max(neigh->parms->retrans_time, HZ/2); + neigh_add_timer(neigh, next); + immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -989,7 +997,11 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) rc = 1; } out_unlock_bh: - write_unlock_bh(&neigh->lock); + if (immediate_probe) + neigh_probe(neigh); + else + write_unlock(&neigh->lock); + local_bh_enable(); return rc; } EXPORT_SYMBOL(__neigh_event_send); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1683e5db2f27..7604a635376b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -147,7 +147,7 @@ static ssize_t show_speed(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); @@ -165,7 +165,7 @@ static ssize_t show_duplex(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); } @@ -712,13 +712,13 @@ static void rx_queue_release(struct kobject *kobj) struct rps_dev_flow_table *flow_table; - map = rcu_dereference_raw(queue->rps_map); + map = rcu_dereference_protected(queue->rps_map, 1); if (map) { RCU_INIT_POINTER(queue->rps_map, NULL); kfree_rcu(map, rcu); } - flow_table = rcu_dereference_raw(queue->rps_flow_table); + flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); if (flow_table) { RCU_INIT_POINTER(queue->rps_flow_table, NULL); call_rcu(&flow_table->rcu, rps_dev_flow_table_release); @@ -987,10 +987,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue, } if (nonempty) - rcu_assign_pointer(dev->xps_maps, new_dev_maps); + RCU_INIT_POINTER(dev->xps_maps, new_dev_maps); else { kfree(new_dev_maps); - rcu_assign_pointer(dev->xps_maps, NULL); + RCU_INIT_POINTER(dev->xps_maps, NULL); } if (dev_maps) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index adf84dd8c7b5..d676a561d983 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -760,7 +760,7 @@ int __netpoll_setup(struct netpoll *np) } /* last thing to do is link it to the net device structure */ - rcu_assign_pointer(ndev->npinfo, npinfo); + RCU_INIT_POINTER(ndev->npinfo, npinfo); return 0; @@ -901,7 +901,7 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - rcu_assign_pointer(np->dev->npinfo, NULL); + RCU_INIT_POINTER(np->dev->npinfo, NULL); /* avoid racing with NAPI reading npinfo */ synchronize_rcu_bh(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e35a6fbb8110..796044ac0bf3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2602,8 +2602,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, if (!pkt_dev->page) break; } - skb_shinfo(skb)->frags[i].page = pkt_dev->page; - get_page(pkt_dev->page); + skb_frag_set_page(skb, i, pkt_dev->page); skb_shinfo(skb)->frags[i].page_offset = 0; /*last fragment, fill rest of data*/ if (i == (frags - 1)) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 99d9e953fe39..39f8dd6a2821 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1604,7 +1604,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, dev_net_set(dev, net); dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - dev->real_num_tx_queues = real_num_queues; if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); diff --git a/net/core/scm.c b/net/core/scm.c index 4c1ef026d695..811b53fb330e 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -192,7 +192,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) goto error; cred->uid = cred->euid = p->creds.uid; - cred->gid = cred->egid = p->creds.uid; + cred->gid = cred->egid = p->creds.gid; put_cred(p->cred); p->cred = cred; } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c new file mode 100644 index 000000000000..45329d7c9dd9 --- /dev/null +++ b/net/core/secure_seq.c @@ -0,0 +1,184 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/cryptohash.h> +#include <linux/module.h> +#include <linux/cache.h> +#include <linux/random.h> +#include <linux/hrtimer.h> +#include <linux/ktime.h> +#include <linux/string.h> + +#include <net/secure_seq.h> + +static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; + +static int __init net_secret_init(void) +{ + get_random_bytes(net_secret, sizeof(net_secret)); + return 0; +} +late_initcall(net_secret_init); + +static u32 seq_scale(u32 seq) +{ + /* + * As close as possible to RFC 793, which + * suggests using a 250 kHz clock. + * Further reading shows this assumes 2 Mb/s networks. + * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. + * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but + * we also need to limit the resolution so that the u32 seq + * overlaps less than one time per MSL (2 minutes). + * Choosing a clock of 64 ns period is OK. (period of 274 s) + */ + return seq + (ktime_to_ns(ktime_get_real()) >> 6); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return seq_scale(hash[0]); +} +EXPORT_SYMBOL(secure_tcpv6_sequence_number); + +u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + (__force u32) daddr[i]; + secret[4] = net_secret[4] + (__force u32)dport; + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return hash[0]; +} +#endif + +#ifdef CONFIG_INET +__u32 secure_ip_id(__be32 daddr) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force __u32) daddr; + hash[1] = net_secret[13]; + hash[2] = net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_ipv6_id(const __be32 daddr[4]) +{ + __u32 hash[4]; + + memcpy(hash, daddr, 16); + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return seq_scale(hash[0]); +} + +u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = (__force u32)dport ^ net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} +EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); +#endif + +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccp_sequence_number); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccpv6_sequence_number); +#endif +#endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2beda824636e..296afd0aa8d2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -326,7 +326,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb_shinfo(skb)->nr_frags) { int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); } /* @@ -529,6 +529,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; + new->ooo_okay = old->ooo_okay; + new->l4_rxhash = old->l4_rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -807,7 +809,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); + skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; } @@ -899,7 +901,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); @@ -1179,7 +1181,7 @@ drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -1348,7 +1350,7 @@ pull_pages: k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); eat -= skb_shinfo(skb)->frags[i].size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -1369,8 +1371,21 @@ pull_pages: } EXPORT_SYMBOL(__pskb_pull_tail); -/* Copy some data bits from skb to kernel buffer. */ - +/** + * skb_copy_bits - copy bits from skb to kernel buffer + * @skb: source skb + * @offset: offset in source + * @to: destination buffer + * @len: number of bytes to copy + * + * Copy the specified number of bytes from the source skb to the + * destination buffer. + * + * CAUTION ! : + * If its prototype is ever changed, + * check arch/{*}/net/{*}.S files, + * since it is called from BPF assembly code. + */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); @@ -1594,7 +1609,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - if (__splice_segment(f->page, f->page_offset, f->size, + if (__splice_segment(skb_frag_page(f), + f->page_offset, f->size, offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -2139,7 +2155,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, * where splitting is expensive. * 2. Split is accurately. We make this. */ - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); skb_shinfo(skb1)->frags[0].page_offset += len - pos; skb_shinfo(skb1)->frags[0].size -= len - pos; skb_shinfo(skb)->frags[i].size = len - pos; @@ -2214,7 +2230,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) * commit all, so that we don't have to undo partial changes */ if (!to || - !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), + fragfrom->page_offset)) { merge = -1; } else { merge = to - 1; @@ -2261,7 +2278,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - get_page(fragfrom->page); + __skb_frag_ref(fragfrom); fragto->page = fragfrom->page; fragto->page_offset = fragfrom->page_offset; fragto->size = todo; @@ -2283,7 +2300,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragto = &skb_shinfo(tgt)->frags[merge]; fragto->size += fragfrom->size; - put_page(fragfrom->page); + __skb_frag_unref(fragfrom); } /* Reposition in the original skb */ @@ -2528,8 +2545,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, left = PAGE_SIZE - frag->page_offset; copy = (length > left)? left : length; - ret = getfrag(from, (page_address(frag->page) + - frag->page_offset + frag->size), + ret = getfrag(from, skb_frag_address(frag) + frag->size, offset, copy, 0, skb); if (ret < 0) return -EFAULT; @@ -2681,7 +2697,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; - get_page(frag->page); + __skb_frag_ref(frag); size = frag->size; if (pos < offset) { @@ -2904,7 +2920,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > len) copy = len; - sg_set_page(&sg[elt], frag->page, copy, + sg_set_page(&sg[elt], skb_frag_page(frag), copy, frag->page_offset+offset-start); elt++; if (!(len -= copy)) diff --git a/net/core/sock.c b/net/core/sock.c index bc745d00ea4d..b29ab61b029c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -387,7 +387,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); - rcu_assign_pointer(sk->sk_dst_cache, NULL); + RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; } @@ -1158,7 +1158,7 @@ static void __sk_free(struct sock *sk) atomic_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); } sock_disable_timestamp(sk, SOCK_TIMESTAMP); @@ -1533,7 +1533,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { struct page *page; - skb_frag_t *frag; page = alloc_pages(sk->sk_allocation, 0); if (!page) { @@ -1543,12 +1542,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, goto failure; } - frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = 0; - frag->size = (data_len >= PAGE_SIZE ? - PAGE_SIZE : - data_len); + __skb_fill_page_desc(skb, i, + page, 0, + (data_len >= PAGE_SIZE ? + PAGE_SIZE : + data_len)); data_len -= PAGE_SIZE; } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 25d717ebc92e..34e9664cae3b 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -78,7 +78,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, copy = end - offset; if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 0462040fc818..67164bb6ae4d 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { - struct dccp_sock *dp = dccp_sk(sk); u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); /* @@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > DCCPF_ACK_RATIO_MAX) - val = DCCPF_ACK_RATIO_MAX; + dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO, + min_t(u32, val, DCCPF_ACK_RATIO_MAX)); +} - if (val == dp->dccps_l_ack_ratio) - return; +static void ccid2_check_l_ack_ratio(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - ccid2_pr_debug("changing local ack ratio to %u\n", val); - dp->dccps_l_ack_ratio = val; + /* + * After a loss, idle period, application limited period, or RTO we + * need to check that the ack ratio is still less than the congestion + * window. Otherwise, we will send an entire congestion window of + * packets and got no response because we haven't sent ack ratio + * packets yet. + * If the ack ratio does need to be reduced, we reduce it to half of + * the congestion window (or 1 if that's zero) instead of to the + * congestion window. This prevents problems if one ack is lost. + */ + if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd) + ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U); +} + +static void ccid2_change_l_seq_window(struct sock *sk, u64 val) +{ + dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW, + clamp_val(val, DCCPF_SEQ_WMIN, + DCCPF_SEQ_WMAX)); } static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -187,6 +205,8 @@ static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) } hc->tx_cwnd_used = 0; hc->tx_cwnd_stamp = now; + + ccid2_check_l_ack_ratio(sk); } /* This borrows the code of tcp_cwnd_restart() */ @@ -205,6 +225,8 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now) hc->tx_cwnd_stamp = now; hc->tx_cwnd_used = 0; + + ccid2_check_l_ack_ratio(sk); } static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) @@ -405,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, unsigned int *maxincr) { struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - if (hc->tx_cwnd < hc->tx_ssthresh) { - if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { + struct dccp_sock *dp = dccp_sk(sk); + int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio; + + if (hc->tx_cwnd < dp->dccps_l_seq_win && + r_seq_used < dp->dccps_r_seq_win) { + if (hc->tx_cwnd < hc->tx_ssthresh) { + if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) { + hc->tx_cwnd += 1; + *maxincr -= 1; + hc->tx_packets_acked = 0; + } + } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { hc->tx_cwnd += 1; - *maxincr -= 1; hc->tx_packets_acked = 0; } - } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { - hc->tx_cwnd += 1; - hc->tx_packets_acked = 0; } + + /* + * Adjust the local sequence window and the ack ratio to allow about + * 5 times the number of packets in the network (RFC 4340 7.5.2) + */ + if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2); + else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U); + + if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2); + else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2); + /* * FIXME: RTT is sampled several times per acknowledgment (for each * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). @@ -441,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); - /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd) - ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); + ccid2_check_l_ack_ratio(sk); } static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, @@ -494,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hc->tx_rpdupack >= NUMDUPACK) { hc->tx_rpdupack = -1; /* XXX lame */ hc->tx_rpseq = 0; - +#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__ + /* + * FIXME: Ack Congestion Control is broken; in + * the current state instabilities occurred with + * Ack Ratios greater than 1; causing hang-ups + * and long RTO timeouts. This needs to be fixed + * before opening up dynamic changes. -- gerrit + */ ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); +#endif } } } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index f585d330e1e5..18c97543e522 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -43,6 +43,12 @@ struct ccid2_seq { #define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 +/* + * Multiple of congestion window to keep the sequence window at + * (RFC 4340 7.5.2) + */ +#define CCID2_WIN_CHANGE_FACTOR 5 + /** * struct ccid2_hc_tx_sock - CCID2 TX half connection * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5fdb07229017..583490aaf56f 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -474,6 +474,7 @@ static inline int dccp_ack_pending(const struct sock *sk) return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); } +extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); extern int dccp_feat_finalise_settings(struct dccp_sock *dp); extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 568def952722..23cea0ee3101 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -12,6 +12,7 @@ * ----------- * o Feature negotiation is coordinated with connection setup (as in TCP), wild * changes of parameters of an established connection are not supported. + * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -343,6 +344,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx, return dccp_feat_table[idx].activation_hdlr(sk, val, rx); } +/** + * dccp_feat_activate - Activate feature value on socket + * @sk: fully connected DCCP socket (after handshake is complete) + * @feat_num: feature to activate, one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is meant + * @fval: the value (SP or NN) to activate, or NULL to use the default value + * For general use this function is preferable over __dccp_feat_activate(). + */ +static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local, + dccp_feat_val const *fval) +{ + return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval); +} + /* Test for "Req'd" feature (RFC 4340, 6.4) */ static inline int dccp_feat_must_be_understood(u8 feat_num) { @@ -650,11 +665,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, return -1; if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) return -1; - /* - * Enter CHANGING after transmitting the Change option (6.6.2). - */ - if (pos->state == FEAT_INITIALISING) - pos->state = FEAT_CHANGING; + + if (skb->sk->sk_state == DCCP_OPEN && + (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) { + /* + * Confirms don't get retransmitted (6.6.3) once the + * connection is in state OPEN + */ + dccp_feat_list_pop(pos); + } else { + /* + * Enter CHANGING after transmitting the Change + * option (6.6.2). + */ + if (pos->state == FEAT_INITIALISING) + pos->state = FEAT_CHANGING; + } } return 0; } @@ -730,6 +756,70 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 0, list, len); } +/** + * dccp_feat_nn_get - Query current/pending value of NN feature + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * For a known NN feature, returns value currently being negotiated, or + * current (confirmed) value if no negotiation is going on. + */ +u64 dccp_feat_nn_get(struct sock *sk, u8 feat) +{ + if (dccp_feat_type(feat) == FEAT_NN) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_feat_entry *entry; + + entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1); + if (entry != NULL) + return entry->val.nn; + + switch (feat) { + case DCCPF_ACK_RATIO: + return dp->dccps_l_ack_ratio; + case DCCPF_SEQUENCE_WINDOW: + return dp->dccps_l_seq_win; + } + } + DCCP_BUG("attempt to look up unsupported feature %u", feat); + return 0; +} +EXPORT_SYMBOL_GPL(dccp_feat_nn_get); + +/** + * dccp_feat_signal_nn_change - Update NN values for an established connection + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * @nn_val: the new value to use + * This function is used to communicate NN updates out-of-band. + */ +int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + dccp_feat_val fval = { .nn = nn_val }; + struct dccp_feat_entry *entry; + + if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN) + return 0; + + if (dccp_feat_type(feat) != FEAT_NN || + !dccp_feat_is_valid_nn_val(feat, nn_val)) + return -EINVAL; + + if (nn_val == dccp_feat_nn_get(sk, feat)) + return 0; /* already set or negotiation under way */ + + entry = dccp_feat_list_lookup(fn, feat, 1); + if (entry != NULL) { + dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n", + (unsigned long long)entry->val.nn, + (unsigned long long)nn_val); + dccp_feat_list_pop(entry); + } + + inet_csk_schedule_ack(sk); + return dccp_feat_push_change(fn, feat, 1, 0, &fval); +} +EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change); /* * Tracking features whose value depend on the choice of CCID @@ -1187,6 +1277,100 @@ confirmation_failed: } /** + * dccp_feat_handle_nn_established - Fast-path reception of NN options + * @sk: socket of an established DCCP connection + * @mandatory: whether @opt was preceded by a Mandatory option + * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only) + * @feat: NN number, one of %dccp_feature_numbers + * @val: NN value + * @len: length of @val in bytes + * This function combines the functionality of change_recv/confirm_recv, with + * the following differences (reset codes are the same): + * - cleanup after receiving the Confirm; + * - values are directly activated after successful parsing; + * - deliberately restricted to NN features. + * The restriction to NN features is essential since SP features can have non- + * predictable outcomes (depending on the remote configuration), and are inter- + * dependent (CCIDs for instance cause further dependencies). + */ +static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt, + u8 feat, u8 *val, u8 len) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + const bool local = (opt == DCCPO_CONFIRM_R); + struct dccp_feat_entry *entry; + u8 type = dccp_feat_type(feat); + dccp_feat_val fval; + + dccp_feat_print_opt(opt, feat, val, len, mandatory); + + /* Ignore non-mandatory unknown and non-NN features */ + if (type == FEAT_UNKNOWN) { + if (local && !mandatory) + return 0; + goto fast_path_unknown; + } else if (type != FEAT_NN) { + return 0; + } + + /* + * We don't accept empty Confirms, since in fast-path feature + * negotiation the values are enabled immediately after sending + * the Change option. + * Empty Changes on the other hand are invalid (RFC 4340, 6.1). + */ + if (len == 0 || len > sizeof(fval.nn)) + goto fast_path_unknown; + + if (opt == DCCPO_CHANGE_L) { + fval.nn = dccp_decode_value_var(val, len); + if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) + goto fast_path_unknown; + + if (dccp_feat_push_confirm(fn, feat, local, &fval) || + dccp_feat_activate(sk, feat, local, &fval)) + return DCCP_RESET_CODE_TOO_BUSY; + + /* set the `Ack Pending' flag to piggyback a Confirm */ + inet_csk_schedule_ack(sk); + + } else if (opt == DCCPO_CONFIRM_R) { + entry = dccp_feat_list_lookup(fn, feat, local); + if (entry == NULL || entry->state != FEAT_CHANGING) + return 0; + + fval.nn = dccp_decode_value_var(val, len); + /* + * Just ignore a value that doesn't match our current value. + * If the option changes twice within two RTTs, then at least + * one CONFIRM will be received for the old value after a + * new CHANGE was sent. + */ + if (fval.nn != entry->val.nn) + return 0; + + /* Only activate after receiving the Confirm option (6.6.1). */ + dccp_feat_activate(sk, feat, local, &fval); + + /* It has been confirmed - so remove the entry */ + dccp_feat_list_pop(entry); + + } else { + DCCP_WARN("Received illegal option %u\n", opt); + goto fast_path_failed; + } + return 0; + +fast_path_unknown: + if (!mandatory) + return dccp_push_empty_confirm(fn, feat, local); + +fast_path_failed: + return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR + : DCCP_RESET_CODE_OPTION_ERROR; +} + +/** * dccp_feat_parse_options - Process Feature-Negotiation Options * @sk: for general use and used by the client during connection setup * @dreq: used by the server during connection setup @@ -1221,6 +1405,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, return dccp_feat_confirm_recv(fn, mandatory, opt, feat, val, len, server); } + break; + /* + * Support for exchanging NN options on an established connection. + */ + case DCCP_OPEN: + case DCCP_PARTOPEN: + return dccp_feat_handle_nn_established(sk, mandatory, opt, feat, + val, len); } return 0; /* ignore FN options in all other states */ } diff --git a/net/dccp/feat.h b/net/dccp/feat.h index e56a4e5e634e..90b957d34d26 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -129,6 +129,7 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); +extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat); extern int dccp_insert_option_mandatory(struct sk_buff *skb); extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8c36adfd1919..332639b56f4d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -26,6 +26,7 @@ #include <net/timewait_sock.h> #include <net/tcp_states.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include "ackvec.h" #include "ccid.h" diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8dc4348774a5..b74f76117dcf 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include <net/transp_v6.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include "dccp.h" #include "ipv6.h" @@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) -{ - return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); -} - -static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 152975d942d9..e742f90a6858 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index ba4faceec405..2ab16e12520c 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -388,7 +388,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) } ifa->ifa_next = dn_db->ifa_list; - rcu_assign_pointer(dn_db->ifa_list, ifa); + RCU_INIT_POINTER(dn_db->ifa_list, ifa); dn_ifaddr_notify(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); @@ -1093,7 +1093,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - rcu_assign_pointer(dev->dn_ptr, dn_db); + RCU_INIT_POINTER(dev->dn_ptr, dn_db); dn_db->dev = dev; init_timer(&dn_db->timer); @@ -1101,7 +1101,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); if (!dn_db->neigh_parms) { - rcu_assign_pointer(dev->dn_ptr, NULL); + RCU_INIT_POINTER(dev->dn_ptr, NULL); kfree(dn_db); return NULL; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0a47b6c37038..56cf9b8e1c7c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -301,7 +301,6 @@ static const struct net_device_ops dsa_netdev_ops = { .ndo_start_xmit = dsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -314,7 +313,6 @@ static const struct net_device_ops edsa_netdev_ops = { .ndo_start_xmit = edsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -327,7 +325,6 @@ static const struct net_device_ops trailer_netdev_ops = { .ndo_start_xmit = trailer_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c new file mode 100644 index 000000000000..19d6aefe97d4 --- /dev/null +++ b/net/ieee802154/6lowpan.c @@ -0,0 +1,891 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define DEBUG + +#include <linux/bitops.h> +#include <linux/if_arp.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netdevice.h> +#include <net/af_ieee802154.h> +#include <net/ieee802154.h> +#include <net/ieee802154_netdev.h> +#include <net/ipv6.h> + +#include "6lowpan.h" + +/* TTL uncompression values */ +static const u8 lowpan_ttl_values[] = {0, 1, 64, 255}; + +static LIST_HEAD(lowpan_devices); + +/* + * Uncompression of linklocal: + * 0 -> 16 bytes from packet + * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet + * 2 -> 2 bytes from prefix - zeroes + 2 from packet + * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr + * + * NOTE: => the uncompress function does change 0xf to 0x10 + * NOTE: 0x00 => no-autoconfig => unspecified + */ +static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20}; + +/* + * Uncompression of ctx-based: + * 0 -> 0 bits from packet [unspecified / reserved] + * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet + * 2 -> 8 bytes from prefix - zeroes + 2 from packet + * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr + */ +static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80}; + +/* + * Uncompression of ctx-base + * 0 -> 0 bits from packet + * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet + * 2 -> 2 bytes from prefix - zeroes + 3 from packet + * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr + */ +static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21}; + +/* Link local prefix */ +static const u8 lowpan_llprefix[] = {0xfe, 0x80}; + +/* private device info */ +struct lowpan_dev_info { + struct net_device *real_dev; /* real WPAN device ptr */ + struct mutex dev_list_mtx; /* mutex for list ops */ +}; + +struct lowpan_dev_record { + struct net_device *ldev; + struct list_head list; +}; + +static inline struct +lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + +static inline void lowpan_address_flip(u8 *src, u8 *dest) +{ + int i; + for (i = 0; i < IEEE802154_ADDR_LEN; i++) + (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i]; +} + +/* list of all 6lowpan devices, uses for package delivering */ +/* print data in line */ +static inline void lowpan_raw_dump_inline(const char *caller, char *msg, + unsigned char *buf, int len) +{ +#ifdef DEBUG + if (msg) + pr_debug("(%s) %s: ", caller, msg); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, + 16, 1, buf, len, false); +#endif /* DEBUG */ +} + +/* + * print data in a table format: + * + * addr: xx xx xx xx xx xx + * addr: xx xx xx xx xx xx + * ... + */ +static inline void lowpan_raw_dump_table(const char *caller, char *msg, + unsigned char *buf, int len) +{ +#ifdef DEBUG + if (msg) + pr_debug("(%s) %s:\n", caller, msg); + print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); +#endif /* DEBUG */ +} + +static u8 +lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr, + const unsigned char *lladdr) +{ + u8 val = 0; + + if (is_addr_mac_addr_based(ipaddr, lladdr)) + val = 3; /* 0-bits */ + else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { + /* compress IID to 16 bits xxxx::XXXX */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); + *hc06_ptr += 2; + val = 2; /* 16-bits */ + } else { + /* do not compress IID => xxxx::IID */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); + *hc06_ptr += 8; + val = 1; /* 64-bits */ + } + + return rol8(val, shift); +} + +static void +lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr) +{ + memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ALEN); + /* second bit-flip (Universe/Local) is done according RFC2464 */ + ipaddr->s6_addr[8] ^= 0x02; +} + +/* + * Uncompress addresses based on a prefix and a postfix with zeroes in + * between. If the postfix is zero in length it will use the link address + * to configure the IP address (autoconf style). + * pref_post_count takes a byte where the first nibble specify prefix count + * and the second postfix count (NOTE: 15/0xf => 16 bytes copy). + */ +static int +lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr, + u8 const *prefix, u8 pref_post_count, unsigned char *lladdr) +{ + u8 prefcount = pref_post_count >> 4; + u8 postcount = pref_post_count & 0x0f; + + /* full nibble 15 => 16 */ + prefcount = (prefcount == 15 ? 16 : prefcount); + postcount = (postcount == 15 ? 16 : postcount); + + if (lladdr) + lowpan_raw_dump_inline(__func__, "linklocal address", + lladdr, IEEE802154_ALEN); + if (prefcount > 0) + memcpy(ipaddr, prefix, prefcount); + + if (prefcount + postcount < 16) + memset(&ipaddr->s6_addr[prefcount], 0, + 16 - (prefcount + postcount)); + + if (postcount > 0) { + memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount); + skb_pull(skb, postcount); + } else if (prefcount > 0) { + if (lladdr == NULL) + return -EINVAL; + + /* no IID based configuration if no prefix and no data */ + lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr); + } + + pr_debug("(%s): uncompressing %d + %d => ", __func__, prefcount, + postcount); + lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16); + + return 0; +} + +static u8 lowpan_fetch_skb_u8(struct sk_buff *skb) +{ + u8 ret; + + ret = skb->data[0]; + skb_pull(skb, 1); + + return ret; +} + +static int lowpan_header_create(struct sk_buff *skb, + struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned len) +{ + u8 tmp, iphc0, iphc1, *hc06_ptr; + struct ipv6hdr *hdr; + const u8 *saddr = _saddr; + const u8 *daddr = _daddr; + u8 *head; + struct ieee802154_addr sa, da; + + if (type != ETH_P_IPV6) + return 0; + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ + head = kzalloc(100, GFP_KERNEL); + if (head == NULL) + return -ENOMEM; + + hdr = ipv6_hdr(skb); + hc06_ptr = head + 2; + + pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" + "\tnexthdr = 0x%02x\n\thop_lim = %d\n", __func__, + hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, + hdr->hop_limit); + + lowpan_raw_dump_table(__func__, "raw skb network header dump", + skb_network_header(skb), sizeof(struct ipv6hdr)); + + if (!saddr) + saddr = dev->dev_addr; + + lowpan_raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + + /* + * As we copy some bit-length fields, in the IPHC encoding bytes, + * we sometimes use |= + * If the field is 0, and the current bit value in memory is 1, + * this does not work. We therefore reset the IPHC encoding here + */ + iphc0 = LOWPAN_DISPATCH_IPHC; + iphc1 = 0; + + /* TODO: context lookup */ + + lowpan_raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); + + /* + * Traffic class, flow label + * If flow label is 0, compress it. If traffic class is 0, compress it + * We have to process both in the same time as the offset of traffic + * class depends on the presence of version and flow label + */ + + /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ + tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); + tmp = ((tmp & 0x03) << 6) | (tmp >> 2); + + if (((hdr->flow_lbl[0] & 0x0F) == 0) && + (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { + /* flow label can be compressed */ + iphc0 |= LOWPAN_IPHC_FL_C; + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress (elide) all */ + iphc0 |= LOWPAN_IPHC_TC_C; + } else { + /* compress only the flow label */ + *hc06_ptr = tmp; + hc06_ptr += 1; + } + } else { + /* Flow label cannot be compressed */ + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress only traffic class */ + iphc0 |= LOWPAN_IPHC_TC_C; + *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); + memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); + hc06_ptr += 3; + } else { + /* compress nothing */ + memcpy(hc06_ptr, &hdr, 4); + /* replace the top byte with new ECN | DSCP format */ + *hc06_ptr = tmp; + hc06_ptr += 4; + } + } + + /* NOTE: payload length is always compressed */ + + /* Next Header is compress if UDP */ + if (hdr->nexthdr == UIP_PROTO_UDP) + iphc0 |= LOWPAN_IPHC_NH_C; + +/* TODO: next header compression */ + + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + *hc06_ptr = hdr->nexthdr; + hc06_ptr += 1; + } + + /* + * Hop limit + * if 1: compress, encoding is 01 + * if 64: compress, encoding is 10 + * if 255: compress, encoding is 11 + * else do not compress + */ + switch (hdr->hop_limit) { + case 1: + iphc0 |= LOWPAN_IPHC_TTL_1; + break; + case 64: + iphc0 |= LOWPAN_IPHC_TTL_64; + break; + case 255: + iphc0 |= LOWPAN_IPHC_TTL_255; + break; + default: + *hc06_ptr = hdr->hop_limit; + break; + } + + /* source address compression */ + if (is_addr_unspecified(&hdr->saddr)) { + pr_debug("(%s): source address is unspecified, setting SAC\n", + __func__); + iphc1 |= LOWPAN_IPHC_SAC; + /* TODO: context lookup */ + } else if (is_addr_link_local(&hdr->saddr)) { + pr_debug("(%s): source address is link-local\n", __func__); + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_SAM_BIT, &hdr->saddr, saddr); + } else { + pr_debug("(%s): send the full source address\n", __func__); + memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + + /* destination address compression */ + if (is_addr_mcast(&hdr->daddr)) { + pr_debug("(%s): destination address is multicast", __func__); + iphc1 |= LOWPAN_IPHC_M; + if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { + pr_debug("compressed to 1 octet\n"); + iphc1 |= LOWPAN_IPHC_DAM_11; + /* use last byte */ + *hc06_ptr = hdr->daddr.s6_addr[15]; + hc06_ptr += 1; + } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { + pr_debug("compressed to 4 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_10; + /* second byte + the last three */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); + hc06_ptr += 4; + } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { + pr_debug("compressed to 6 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_01; + /* second byte + the last five */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); + hc06_ptr += 6; + } else { + pr_debug("using full address\n"); + iphc1 |= LOWPAN_IPHC_DAM_00; + memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); + hc06_ptr += 16; + } + } else { + pr_debug("(%s): destination address is unicast: ", __func__); + /* TODO: context lookup */ + if (is_addr_link_local(&hdr->daddr)) { + pr_debug("destination address is link-local\n"); + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_DAM_BIT, &hdr->daddr, daddr); + } else { + pr_debug("using full address\n"); + memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + } + + /* TODO: UDP header compression */ + /* TODO: Next Header compression */ + + head[0] = iphc0; + head[1] = iphc1; + + skb_pull(skb, sizeof(struct ipv6hdr)); + memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + + kfree(head); + + lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, + skb->len); + + /* + * NOTE1: I'm still unsure about the fact that compression and WPAN + * header are created here and not later in the xmit. So wait for + * an opinion of net maintainers. + */ + /* + * NOTE2: to be absolutely correct, we must derive PANid information + * from MAC subif of the 'dev' and 'real_dev' network devices, but + * this isn't implemented in mainline yet, so currently we assign 0xff + */ + { + /* prepare wpan address data */ + sa.addr_type = IEEE802154_ADDR_LONG; + sa.pan_id = 0xff; + + da.addr_type = IEEE802154_ADDR_LONG; + da.pan_id = 0xff; + + memcpy(&(da.hwaddr), daddr, 8); + memcpy(&(sa.hwaddr), saddr, 8); + + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + type, (void *)&da, (void *)&sa, skb->len); + } +} + +static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) +{ + struct sk_buff *new; + struct lowpan_dev_record *entry; + int stat = NET_RX_SUCCESS; + + new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), + GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb_push(new, sizeof(struct ipv6hdr)); + skb_reset_network_header(new); + skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); + + new->protocol = htons(ETH_P_IPV6); + new->pkt_type = PACKET_HOST; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) { + skb = skb_copy(new, GFP_ATOMIC); + if (!skb) { + stat = -ENOMEM; + break; + } + + skb->dev = entry->ldev; + stat = netif_rx(skb); + } + rcu_read_unlock(); + + kfree_skb(new); + + return stat; +} + +static int +lowpan_process_data(struct sk_buff *skb) +{ + struct ipv6hdr hdr; + u8 tmp, iphc0, iphc1, num_context = 0; + u8 *_saddr, *_daddr; + int err; + + lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, + skb->len); + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + goto drop; + iphc0 = lowpan_fetch_skb_u8(skb); + iphc1 = lowpan_fetch_skb_u8(skb); + + _saddr = mac_cb(skb)->sa.hwaddr; + _daddr = mac_cb(skb)->da.hwaddr; + + pr_debug("(%s): iphc0 = %02x, iphc1 = %02x\n", __func__, iphc0, iphc1); + + /* another if the CID flag is set */ + if (iphc1 & LOWPAN_IPHC_CID) { + pr_debug("(%s): CID flag is set, increase header with one\n", + __func__); + if (!skb->len) + goto drop; + num_context = lowpan_fetch_skb_u8(skb); + } + + hdr.version = 6; + + /* Traffic Class and Flow Label */ + switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { + /* + * Traffic Class and FLow Label carried in-line + * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) + */ + case 0: /* 00b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + memcpy(&hdr.flow_lbl, &skb->data[0], 3); + skb_pull(skb, 3); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | + (hdr.flow_lbl[0] & 0x0f); + break; + /* + * Traffic class carried in-line + * ECN + DSCP (1 byte), Flow Label is elided + */ + case 1: /* 10b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); + hdr.flow_lbl[1] = 0; + hdr.flow_lbl[2] = 0; + break; + /* + * Flow Label carried in-line + * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided + */ + case 2: /* 01b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); + memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); + skb_pull(skb, 2); + break; + /* Traffic Class and Flow Label are elided */ + case 3: /* 11b */ + hdr.priority = 0; + hdr.flow_lbl[0] = 0; + hdr.flow_lbl[1] = 0; + hdr.flow_lbl[2] = 0; + break; + default: + break; + } + + /* Next Header */ + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + /* Next header is carried inline */ + if (!skb->len) + goto drop; + hdr.nexthdr = lowpan_fetch_skb_u8(skb); + pr_debug("(%s): NH flag is set, next header is carried " + "inline: %02x\n", __func__, hdr.nexthdr); + } + + /* Hop Limit */ + if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) + hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; + else { + if (!skb->len) + goto drop; + hdr.hop_limit = lowpan_fetch_skb_u8(skb); + } + + /* Extract SAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; + + /* Source address uncompression */ + pr_debug("(%s): source address stateless compression\n", __func__); + err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix, + lowpan_unc_llconf[tmp], skb->data); + if (err) + goto drop; + + /* Extract DAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; + + /* check for Multicast Compression */ + if (iphc1 & LOWPAN_IPHC_M) { + if (iphc1 & LOWPAN_IPHC_DAC) { + pr_debug("(%s): destination address context-based " + "multicast compression\n", __func__); + /* TODO: implement this */ + } else { + u8 prefix[] = {0xff, 0x02}; + + pr_debug("(%s): destination address non-context-based" + " multicast compression\n", __func__); + if (0 < tmp && tmp < 3) { + if (!skb->len) + goto drop; + else + prefix[1] = lowpan_fetch_skb_u8(skb); + } + + err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix, + lowpan_unc_mxconf[tmp], NULL); + if (err) + goto drop; + } + } else { + pr_debug("(%s): destination address stateless compression\n", + __func__); + err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix, + lowpan_unc_llconf[tmp], skb->data); + if (err) + goto drop; + } + + /* TODO: UDP header parse */ + + /* Not fragmented package */ + hdr.payload_len = htons(skb->len); + + pr_debug("(%s): skb headroom size = %d, data length = %d\n", __func__, + skb_headroom(skb), skb->len); + + pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t" + "nexthdr = 0x%02x\n\thop_lim = %d\n", __func__, hdr.version, + ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit); + + lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, + sizeof(hdr)); + return lowpan_skb_deliver(skb, &hdr); +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int lowpan_set_address(struct net_device *dev, void *p) +{ + struct sockaddr *sa = p; + + if (netif_running(dev)) + return -EBUSY; + + /* TODO: validate addr */ + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + + return 0; +} + +static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err = 0; + + pr_debug("(%s): package xmit\n", __func__); + + skb->dev = lowpan_dev_info(dev)->real_dev; + if (skb->dev == NULL) { + pr_debug("(%s) ERROR: no real wpan device found\n", __func__); + dev_kfree_skb(skb); + } else + err = dev_queue_xmit(skb); + + return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); +} + +static void lowpan_dev_free(struct net_device *dev) +{ + dev_put(lowpan_dev_info(dev)->real_dev); + free_netdev(dev); +} + +static struct header_ops lowpan_header_ops = { + .create = lowpan_header_create, +}; + +static const struct net_device_ops lowpan_netdev_ops = { + .ndo_start_xmit = lowpan_xmit, + .ndo_set_mac_address = lowpan_set_address, +}; + +static void lowpan_setup(struct net_device *dev) +{ + pr_debug("(%s)\n", __func__); + + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + dev->type = ARPHRD_IEEE802154; + dev->features = NETIF_F_NO_CSUM; + /* Frame Control + Sequence Number + Address fields + Security Header */ + dev->hard_header_len = 2 + 1 + 20 + 14; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = 1281; + dev->tx_queue_len = 0; + dev->flags = IFF_NOARP | IFF_BROADCAST; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &lowpan_netdev_ops; + dev->header_ops = &lowpan_header_ops; + dev->destructor = lowpan_dev_free; +} + +static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + pr_debug("(%s)\n", __func__); + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) + return -EINVAL; + } + return 0; +} + +static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + if (!netif_running(dev)) + goto drop; + + if (dev->type != ARPHRD_IEEE802154) + goto drop; + + /* check that it's our buffer */ + if ((skb->data[0] & 0xe0) == 0x60) + lowpan_process_data(skb); + + return NET_RX_SUCCESS; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int lowpan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *real_dev; + struct lowpan_dev_record *entry; + + pr_debug("(%s)\n", __func__); + + if (!tb[IFLA_LINK]) + return -EINVAL; + /* find and hold real wpan device */ + real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + + lowpan_dev_info(dev)->real_dev = real_dev; + mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); + + entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); + if (!entry) { + dev_put(real_dev); + lowpan_dev_info(dev)->real_dev = NULL; + return -ENOMEM; + } + + entry->ldev = dev; + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, &lowpan_devices); + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + register_netdevice(dev); + + return 0; +} + +static void lowpan_dellink(struct net_device *dev, struct list_head *head) +{ + struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); + struct net_device *real_dev = lowpan_dev->real_dev; + struct lowpan_dev_record *entry; + struct lowpan_dev_record *tmp; + + ASSERT_RTNL(); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (entry->ldev == dev) { + list_del(&entry->list); + kfree(entry); + } + } + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); + + unregister_netdevice_queue(dev, head); + + dev_put(real_dev); +} + +static struct rtnl_link_ops lowpan_link_ops __read_mostly = { + .kind = "lowpan", + .priv_size = sizeof(struct lowpan_dev_info), + .setup = lowpan_setup, + .newlink = lowpan_newlink, + .dellink = lowpan_dellink, + .validate = lowpan_validate, +}; + +static inline int __init lowpan_netlink_init(void) +{ + return rtnl_link_register(&lowpan_link_ops); +} + +static inline void __init lowpan_netlink_fini(void) +{ + rtnl_link_unregister(&lowpan_link_ops); +} + +static struct packet_type lowpan_packet_type = { + .type = __constant_htons(ETH_P_IEEE802154), + .func = lowpan_rcv, +}; + +static int __init lowpan_init_module(void) +{ + int err = 0; + + pr_debug("(%s)\n", __func__); + + err = lowpan_netlink_init(); + if (err < 0) + goto out; + + dev_add_pack(&lowpan_packet_type); +out: + return err; +} + +static void __exit lowpan_cleanup_module(void) +{ + pr_debug("(%s)\n", __func__); + + lowpan_netlink_fini(); + + dev_remove_pack(&lowpan_packet_type); +} + +module_init(lowpan_init_module); +module_exit(lowpan_cleanup_module); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h new file mode 100644 index 000000000000..5d8cf80b930d --- /dev/null +++ b/net/ieee802154/6lowpan.h @@ -0,0 +1,212 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __6LOWPAN_H__ +#define __6LOWPAN_H__ + +/* need to know address length to manipulate with it */ +#define IEEE802154_ALEN 8 + +#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */ +#define UIP_IPH_LEN 40 /* ipv6 fixed header size */ +#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */ +#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */ + +/* + * ipv6 address based on mac + * second bit-flip (Universe/Local) is done according RFC2464 + */ +#define is_addr_mac_addr_based(a, m) \ + ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \ + (((a)->s6_addr[9]) == (m)[1]) && \ + (((a)->s6_addr[10]) == (m)[2]) && \ + (((a)->s6_addr[11]) == (m)[3]) && \ + (((a)->s6_addr[12]) == (m)[4]) && \ + (((a)->s6_addr[13]) == (m)[5]) && \ + (((a)->s6_addr[14]) == (m)[6]) && \ + (((a)->s6_addr[15]) == (m)[7])) + +/* ipv6 address is unspecified */ +#define is_addr_unspecified(a) \ + ((((a)->s6_addr32[0]) == 0) && \ + (((a)->s6_addr32[1]) == 0) && \ + (((a)->s6_addr32[2]) == 0) && \ + (((a)->s6_addr32[3]) == 0)) + +/* compare ipv6 addresses prefixes */ +#define ipaddr_prefixcmp(addr1, addr2, length) \ + (memcmp(addr1, addr2, length >> 3) == 0) + +/* local link, i.e. FE80::/10 */ +#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE) + +/* + * check whether we can compress the IID to 16 bits, + * it's possible for unicast adresses with first 49 bits are zero only. + */ +#define lowpan_is_iid_16_bit_compressable(a) \ + ((((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + ((((a)->s6_addr[14]) & 0x80) == 0)) + +/* multicast address */ +#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) + +/* check whether the 112-bit gid of the multicast address is mappable to: */ + +/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */ +#define lowpan_is_mcast_addr_compressable(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + (((a)->s6_addr[14]) == 0) && \ + ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2))) + +/* 48 bits, FFXX::00XX:XXXX:XXXX */ +#define lowpan_is_mcast_addr_compressable48(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr[10]) == 0)) + +/* 32 bits, FFXX::00XX:XXXX */ +#define lowpan_is_mcast_addr_compressable32(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr[12]) == 0)) + +/* 8 bits, FF02::00XX */ +#define lowpan_is_mcast_addr_compressable8(a) \ + ((((a)->s6_addr[1]) == 2) && \ + (((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + (((a)->s6_addr[14]) == 0)) + +#define lowpan_is_addr_broadcast(a) \ + ((((a)[0]) == 0xFF) && \ + (((a)[1]) == 0xFF) && \ + (((a)[2]) == 0xFF) && \ + (((a)[3]) == 0xFF) && \ + (((a)[4]) == 0xFF) && \ + (((a)[5]) == 0xFF) && \ + (((a)[6]) == 0xFF) && \ + (((a)[7]) == 0xFF)) + +#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */ +#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */ +#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */ +#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */ +#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */ + +/* + * Values of fields within the IPHC encoding first byte + * (C stands for compressed and I for inline) + */ +#define LOWPAN_IPHC_TF 0x18 + +#define LOWPAN_IPHC_FL_C 0x10 +#define LOWPAN_IPHC_TC_C 0x08 +#define LOWPAN_IPHC_NH_C 0x04 +#define LOWPAN_IPHC_TTL_1 0x01 +#define LOWPAN_IPHC_TTL_64 0x02 +#define LOWPAN_IPHC_TTL_255 0x03 +#define LOWPAN_IPHC_TTL_I 0x00 + + +/* Values of fields within the IPHC encoding second byte */ +#define LOWPAN_IPHC_CID 0x80 + +#define LOWPAN_IPHC_SAC 0x40 +#define LOWPAN_IPHC_SAM_00 0x00 +#define LOWPAN_IPHC_SAM_01 0x10 +#define LOWPAN_IPHC_SAM_10 0x20 +#define LOWPAN_IPHC_SAM 0x30 + +#define LOWPAN_IPHC_SAM_BIT 4 + +#define LOWPAN_IPHC_M 0x08 +#define LOWPAN_IPHC_DAC 0x04 +#define LOWPAN_IPHC_DAM_00 0x00 +#define LOWPAN_IPHC_DAM_01 0x01 +#define LOWPAN_IPHC_DAM_10 0x02 +#define LOWPAN_IPHC_DAM_11 0x03 + +#define LOWPAN_IPHC_DAM_BIT 0 +/* + * LOWPAN_UDP encoding (works together with IPHC) + */ +#define LOWPAN_NHC_UDP_MASK 0xF8 +#define LOWPAN_NHC_UDP_ID 0xF0 +#define LOWPAN_NHC_UDP_CHECKSUMC 0x04 +#define LOWPAN_NHC_UDP_CHECKSUMI 0x00 + +/* values for port compression, _with checksum_ ie bit 5 set to 0 */ +#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */ +#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline, + dest = 0xF0 + 8 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline, + dest = 16 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ + +#endif /* __6LOWPAN_H__ */ diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 1c1de97d264a..7dee65052925 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -10,3 +10,9 @@ config IEEE802154 Say Y here to compile LR-WPAN support into the kernel or say M to compile it as modules. + +config IEEE802154_6LOWPAN + tristate "6lowpan support over IEEE 802.15.4" + depends on IEEE802154 && IPV6 + ---help--- + IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 5761185f884e..d7716d64c6bb 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,3 +1,5 @@ -obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o -af_802154-y := af_ieee802154.o raw.o dgram.o +obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o +obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o + +ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o +af_802154-y := af_ieee802154.o raw.o dgram.o diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bc19bd06dd00..c6b5092f29a1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev) ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ - rcu_assign_pointer(dev->ip_ptr, in_dev); + RCU_INIT_POINTER(dev->ip_ptr, in_dev); out: return in_dev; out_kfree: @@ -291,7 +291,7 @@ static void inetdev_destroy(struct in_device *in_dev) inet_free_ifa(ifa); } - rcu_assign_pointer(dev->ip_ptr, NULL); + RCU_INIT_POINTER(dev->ip_ptr, NULL); devinet_sysctl_unregister(in_dev); neigh_parms_release(&arp_tbl, in_dev->arp_parms); @@ -1175,7 +1175,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_REGISTER: printk(KERN_DEBUG "inetdev_event: bug\n"); - rcu_assign_pointer(dev->ip_ptr, NULL); + RCU_INIT_POINTER(dev->ip_ptr, NULL); break; case NETDEV_UP: if (!inetdev_valid_mtu(dev->mtu)) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index de9e2978476f..89d6f71a6a99 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -204,7 +204,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) return (struct tnode *)(parent & ~NODE_TYPE_MASK); } -/* Same as rcu_assign_pointer +/* Same as RCU_INIT_POINTER * but that macro() assumes that value is a pointer. */ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) @@ -528,7 +528,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node * if (n) node_set_parent(n, tn); - rcu_assign_pointer(tn->child[i], n); + RCU_INIT_POINTER(tn->child[i], n); } #define MAX_WORK 10 @@ -1014,7 +1014,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) tp = node_parent((struct rt_trie_node *) tn); if (!tp) - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); + RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); if (!tp) @@ -1026,7 +1026,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); + RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); } @@ -1163,7 +1163,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)tn); } else { - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); + RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); tp = tn; } } @@ -1621,7 +1621,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) put_child(t, (struct tnode *)tp, cindex, NULL); trie_rebalance(t, tp); } else - rcu_assign_pointer(t->trie, NULL); + RCU_INIT_POINTER(t->trie, NULL); free_leaf(l); } diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index dbfc21de3479..8cb1ebb7cd74 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -34,7 +34,7 @@ int gre_add_protocol(const struct gre_protocol *proto, u8 version) if (gre_proto[version]) goto err_out_unlock; - rcu_assign_pointer(gre_proto[version], proto); + RCU_INIT_POINTER(gre_proto[version], proto); spin_unlock(&gre_proto_lock); return 0; @@ -54,7 +54,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) if (rcu_dereference_protected(gre_proto[version], lockdep_is_held(&gre_proto_lock)) != proto) goto err_out_unlock; - rcu_assign_pointer(gre_proto[version], NULL); + RCU_INIT_POINTER(gre_proto[version], NULL); spin_unlock(&gre_proto_lock); synchronize_rcu(); return 0; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f1d27f6c9351..ce57bdee14cb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1009,7 +1009,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG. We will get multicast token leakage, when IFF_MULTICAST - is changed. This check should be done in dev->set_multicast_list + is changed. This check should be done in ndo_set_rx_mode routine. Something sort of: if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; } --ANK @@ -1242,7 +1242,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - rcu_assign_pointer(in_dev->mc_list, im); + RCU_INIT_POINTER(in_dev->mc_list, im); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; for (j=0; j<i; j++) - (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); + (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST struct ip_sf_list *psf; @@ -1813,7 +1813,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - rcu_assign_pointer(inet->mc_list, iml); + RCU_INIT_POINTER(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: @@ -1835,7 +1835,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, } err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, psf->sl_count, psf->sl_addr, 0); - rcu_assign_pointer(iml->sflist, NULL); + RCU_INIT_POINTER(iml->sflist, NULL); /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); kfree_rcu(psf, rcu); @@ -2000,7 +2000,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } - rcu_assign_pointer(pmc->sflist, newpsl); + RCU_INIT_POINTER(pmc->sflist, newpsl); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2103,7 +2103,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); - rcu_assign_pointer(pmc->sflist, newpsl); + RCU_INIT_POINTER(pmc->sflist, newpsl); pmc->sfmode = msf->imsf_fmode; err = 0; done: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c0369a3a663..984ec656b03b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,6 +21,7 @@ #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> +#include <net/secure_seq.h> #include <net/ip.h> /* diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index ef7ae6049a51..8e6be5aad115 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -433,7 +433,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, if (!lro_mgr->get_frag_header || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, (void *)&tcph, &flags, priv)) { - mac_hdr = page_address(frags->page) + frags->page_offset; + mac_hdr = skb_frag_address(frags); goto out1; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e38213817d0a..86f13c67ea85 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -19,6 +19,7 @@ #include <linux/net.h> #include <net/ip.h> #include <net/inetpeer.h> +#include <net/secure_seq.h> /* * Theory of operations. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ccaaa851ab42..ae3bb147affd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); + skb_dst_force(newskb); netif_rx_ni(newskb); return 0; } @@ -204,9 +205,15 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); @@ -982,13 +989,13 @@ alloc_new_skb: if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; - if (page != frag->page) { + if (page != skb_frag_page(frag)) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } - get_page(page); skb_fill_page_desc(skb, i, page, off, 0); + skb_frag_ref(skb, i); frag = &skb_shinfo(skb)->frags[i]; } } else if (i < MAX_SKB_FRAGS) { @@ -1008,7 +1015,8 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { + if (getfrag(from, skb_frag_address(frag)+frag->size, + offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ab0c9efd1efa..8905e92f896a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); */ static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct inet_sock *inet = inet_sk(sk); int val; @@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; @@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level, { int err; - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ip_getsockopt); - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 378b20b7ca6e..065effd8349a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - rcu_assign_pointer(*tp, t->next); + RCU_INIT_POINTER(*tp, t->next); break; } } @@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); + RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); + RCU_INIT_POINTER(*tp, t); } static struct ip_tunnel * ipip_tunnel_locate(struct net *net, @@ -301,7 +301,7 @@ static void ipip_tunnel_uninit(struct net_device *dev) struct ipip_net *ipn = net_generic(net, ipip_net_id); if (dev == ipn->fb_tunnel_dev) - rcu_assign_pointer(ipn->tunnels_wc[0], NULL); + RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); else ipip_tunnel_unlink(ipn, netdev_priv(dev)); dev_put(dev); @@ -791,7 +791,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return -ENOMEM; dev_hold(dev); - rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); + RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel); return 0; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 58e879157976..6164e982e0ef 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1176,7 +1176,7 @@ static void mrtsock_destruct(struct sock *sk) ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; - rcu_assign_pointer(mrt->mroute_sk, NULL); + RCU_INIT_POINTER(mrt->mroute_sk, NULL); mroute_clean_tables(mrt); } } @@ -1203,7 +1203,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -ENOENT; if (optname != MRT_INIT) { - if (sk != rcu_dereference_raw(mrt->mroute_sk) && + if (sk != rcu_access_pointer(mrt->mroute_sk) && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1224,13 +1224,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { - rcu_assign_pointer(mrt->mroute_sk, sk); + RCU_INIT_POINTER(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; } rtnl_unlock(); return ret; case MRT_DONE: - if (sk != rcu_dereference_raw(mrt->mroute_sk)) + if (sk != rcu_access_pointer(mrt->mroute_sk)) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 2e97e3ec1eb7..929b27bdeb79 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - __u8 flags = 0; + __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; unsigned int hh_len; - if (!skb->sk && addr_type != RTN_LOCAL) { - if (addr_type == RTN_UNSPEC) - addr_type = inet_addr_type(net, saddr); - if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) - flags |= FLOWI_FLAG_ANYSRC; - else - saddr = 0; - } + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(net, saddr); + if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) + flags |= FLOWI_FLAG_ANYSRC; + else + saddr = 0; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. @@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl4.flowi4_mark = skb->mark; - fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; + fl4.flowi4_flags = flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 703f366fd235..7b22382ff0e9 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -70,14 +70,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_amanda_fini(void) { - rcu_assign_pointer(nf_nat_amanda_hook, NULL); + RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); synchronize_rcu(); } static int __init nf_nat_amanda_init(void) { BUG_ON(nf_nat_amanda_hook != NULL); - rcu_assign_pointer(nf_nat_amanda_hook, help); + RCU_INIT_POINTER(nf_nat_amanda_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 3346de5d94d0..447bc5cfdc6c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -514,7 +514,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) ret = -EBUSY; goto out; } - rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); + RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto); out: spin_unlock_bh(&nf_nat_lock); return ret; @@ -525,7 +525,7 @@ EXPORT_SYMBOL(nf_nat_protocol_register); void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) { spin_lock_bh(&nf_nat_lock); - rcu_assign_pointer(nf_nat_protos[proto->protonum], + RCU_INIT_POINTER(nf_nat_protos[proto->protonum], &nf_nat_unknown_protocol); spin_unlock_bh(&nf_nat_lock); synchronize_rcu(); @@ -736,10 +736,10 @@ static int __init nf_nat_init(void) /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); for (i = 0; i < MAX_IP_NAT_PROTO; i++) - rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); - rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); - rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); - rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); + RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); spin_unlock_bh(&nf_nat_lock); /* Initialize fake conntrack so that NAT will skip it */ @@ -748,12 +748,12 @@ static int __init nf_nat_init(void) l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); BUG_ON(nf_nat_seq_adjust_hook != NULL); - rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); - rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); BUG_ON(nf_ct_nat_offset != NULL); - rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); + RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); return 0; cleanup_extend: @@ -766,9 +766,9 @@ static void __exit nf_nat_cleanup(void) unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); - rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); - rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); - rcu_assign_pointer(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); synchronize_net(); } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index dc73abb3fe27..e462a957d080 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -113,14 +113,14 @@ out: static void __exit nf_nat_ftp_fini(void) { - rcu_assign_pointer(nf_nat_ftp_hook, NULL); + RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); synchronize_rcu(); } static int __init nf_nat_ftp_init(void) { BUG_ON(nf_nat_ftp_hook != NULL); - rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); + RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 790f3160e012..b9a1136addbd 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -581,30 +581,30 @@ static int __init init(void) BUG_ON(nat_callforwarding_hook != NULL); BUG_ON(nat_q931_hook != NULL); - rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); - rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); - rcu_assign_pointer(set_sig_addr_hook, set_sig_addr); - rcu_assign_pointer(set_ras_addr_hook, set_ras_addr); - rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp); - rcu_assign_pointer(nat_t120_hook, nat_t120); - rcu_assign_pointer(nat_h245_hook, nat_h245); - rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); - rcu_assign_pointer(nat_q931_hook, nat_q931); + RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr); + RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr); + RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr); + RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp); + RCU_INIT_POINTER(nat_t120_hook, nat_t120); + RCU_INIT_POINTER(nat_h245_hook, nat_h245); + RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding); + RCU_INIT_POINTER(nat_q931_hook, nat_q931); return 0; } /****************************************************************************/ static void __exit fini(void) { - rcu_assign_pointer(set_h245_addr_hook, NULL); - rcu_assign_pointer(set_h225_addr_hook, NULL); - rcu_assign_pointer(set_sig_addr_hook, NULL); - rcu_assign_pointer(set_ras_addr_hook, NULL); - rcu_assign_pointer(nat_rtp_rtcp_hook, NULL); - rcu_assign_pointer(nat_t120_hook, NULL); - rcu_assign_pointer(nat_h245_hook, NULL); - rcu_assign_pointer(nat_callforwarding_hook, NULL); - rcu_assign_pointer(nat_q931_hook, NULL); + RCU_INIT_POINTER(set_h245_addr_hook, NULL); + RCU_INIT_POINTER(set_h225_addr_hook, NULL); + RCU_INIT_POINTER(set_sig_addr_hook, NULL); + RCU_INIT_POINTER(set_ras_addr_hook, NULL); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL); + RCU_INIT_POINTER(nat_t120_hook, NULL); + RCU_INIT_POINTER(nat_h245_hook, NULL); + RCU_INIT_POINTER(nat_callforwarding_hook, NULL); + RCU_INIT_POINTER(nat_q931_hook, NULL); synchronize_rcu(); } diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 535e1a802356..979ae165f4ef 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -75,14 +75,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_irc_fini(void) { - rcu_assign_pointer(nf_nat_irc_hook, NULL); + RCU_INIT_POINTER(nf_nat_irc_hook, NULL); synchronize_rcu(); } static int __init nf_nat_irc_init(void) { BUG_ON(nf_nat_irc_hook != NULL); - rcu_assign_pointer(nf_nat_irc_hook, help); + RCU_INIT_POINTER(nf_nat_irc_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 4c060038d29f..3e8284ba46b8 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -282,25 +282,25 @@ static int __init nf_nat_helper_pptp_init(void) nf_nat_need_gre(); BUG_ON(nf_nat_pptp_hook_outbound != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); BUG_ON(nf_nat_pptp_hook_inbound != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); BUG_ON(nf_nat_pptp_hook_expectfn != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected); return 0; } static void __exit nf_nat_helper_pptp_fini(void) { - rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL); synchronize_rcu(); } diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 3e61faf23a9a..f52d41ea0690 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include <linux/ip.h> #include <linux/netfilter.h> +#include <net/secure_seq.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_rule.h> diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e40cf7816fdb..78844d9208f1 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -528,13 +528,13 @@ err1: static void __exit nf_nat_sip_fini(void) { - rcu_assign_pointer(nf_nat_sip_hook, NULL); - rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); - rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_session_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); synchronize_rcu(); } @@ -547,13 +547,13 @@ static int __init nf_nat_sip_init(void) BUG_ON(nf_nat_sdp_port_hook != NULL); BUG_ON(nf_nat_sdp_session_hook != NULL); BUG_ON(nf_nat_sdp_media_hook != NULL); - rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); - rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); - rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); - rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); - rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); - rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session); - rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media); + RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 076b7c8c4aa4..d1cb412c18e0 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1310,7 +1310,7 @@ static int __init nf_nat_snmp_basic_init(void) int ret = 0; BUG_ON(nf_nat_snmp_hook != NULL); - rcu_assign_pointer(nf_nat_snmp_hook, help); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); ret = nf_conntrack_helper_register(&snmp_trap_helper); if (ret < 0) { @@ -1322,7 +1322,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { - rcu_assign_pointer(nf_nat_snmp_hook, NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index a6e606e84820..92900482edea 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -284,7 +284,7 @@ static int __init nf_nat_standalone_init(void) #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); - rcu_assign_pointer(ip_nat_decode_session, nat_decode_session); + RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session); #endif ret = nf_nat_rule_init(); if (ret < 0) { @@ -302,7 +302,7 @@ static int __init nf_nat_standalone_init(void) nf_nat_rule_cleanup(); cleanup_decode_session: #ifdef CONFIG_XFRM - rcu_assign_pointer(ip_nat_decode_session, NULL); + RCU_INIT_POINTER(ip_nat_decode_session, NULL); synchronize_net(); #endif return ret; @@ -313,7 +313,7 @@ static void __exit nf_nat_standalone_fini(void) nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); nf_nat_rule_cleanup(); #ifdef CONFIG_XFRM - rcu_assign_pointer(ip_nat_decode_session, NULL); + RCU_INIT_POINTER(ip_nat_decode_session, NULL); synchronize_net(); #endif /* Conntrack caches are unregistered in nf_conntrack_cleanup */ diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 7274a43c7a12..a2901bf829c0 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -36,14 +36,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_tftp_fini(void) { - rcu_assign_pointer(nf_nat_tftp_hook, NULL); + RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); synchronize_rcu(); } static int __init nf_nat_tftp_init(void) { BUG_ON(nf_nat_tftp_hook != NULL); - rcu_assign_pointer(nf_nat_tftp_hook, help); + RCU_INIT_POINTER(nf_nat_tftp_hook, help); return 0; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1457acb39cec..61714bd52925 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + daddr, saddr, 0, 0); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1730689f560e..2c21d3be891b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,6 +109,7 @@ #include <linux/sysctl.h> #endif #include <net/atmclip.h> +#include <net/secure_seq.h> #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) @@ -323,7 +324,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) + if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) continue; rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); @@ -349,7 +350,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, do { if (--st->bucket < 0) return NULL; - } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); + } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); } @@ -721,7 +722,7 @@ static inline bool compare_hash_inputs(const struct rtable *rt1, { return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_iif ^ rt2->rt_iif)) == 0); + (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0); } static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) @@ -730,8 +731,8 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | - (rt1->rt_oif ^ rt2->rt_oif) | - (rt1->rt_iif ^ rt2->rt_iif)) == 0; + (rt1->rt_route_iif ^ rt2->rt_route_iif) | + (rt1->rt_oif ^ rt2->rt_oif)) == 0; } static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) @@ -760,7 +761,7 @@ static void rt_do_flush(struct net *net, int process_context) if (process_context && need_resched()) cond_resched(); - rth = rcu_dereference_raw(rt_hash_table[i].chain); + rth = rcu_access_pointer(rt_hash_table[i].chain); if (!rth) continue; @@ -1628,16 +1629,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; + struct neighbour *n, *old_n; dst_confirm(&rt->dst); - neigh_release(dst_get_neighbour(&rt->dst)); - dst_set_neighbour(&rt->dst, NULL); - rt->rt_gateway = peer->redirect_learned.a4; - rt_bind_neighbour(rt); - n = dst_get_neighbour(&rt->dst); + + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); if (!n || !(n->nud_state & NUD_VALID)) { if (n) neigh_event_send(n, NULL); @@ -2317,8 +2320,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth = rcu_dereference(rth->dst.rt_next)) { if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | - (rth->rt_iif ^ iif) | - rth->rt_oif | + (rth->rt_route_iif ^ iif) | (rth->rt_key_tos ^ tos)) == 0 && rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 92bb9434b338..3bc5c8f7c71b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -276,7 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; - bool ecn_ok; + bool ecn_ok = false; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46febcacb729..cc0d5dead30c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2857,26 +2857,25 @@ EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { int cpu; + for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); - if (p) { - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - kfree(p); - } + struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); + + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); } free_percpu(pool); } void tcp_free_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *pool = NULL; + struct tcp_md5sig_pool __percpu *pool = NULL; spin_lock_bh(&tcp_md5sig_pool_lock); if (--tcp_md5sig_users == 0) { @@ -2889,30 +2888,24 @@ void tcp_free_md5sig_pool(void) } EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool * __percpu * +static struct tcp_md5sig_pool __percpu * __tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; - pool = alloc_percpu(struct tcp_md5sig_pool *); + pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) return NULL; for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p; struct crypto_hash *hash; - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - goto out_free; - *per_cpu_ptr(pool, cpu) = p; - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); if (!hash || IS_ERR(hash)) goto out_free; - p->md5_desc.tfm = hash; + per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } return pool; out_free: @@ -2920,9 +2913,9 @@ out_free: return NULL; } -struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) { - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; int alloc = 0; retry: @@ -2941,7 +2934,7 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); @@ -2974,7 +2967,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; local_bh_disable(); @@ -2985,7 +2978,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *this_cpu_ptr(p); + return this_cpu_ptr(p); local_bh_enable(); return NULL; @@ -3035,7 +3028,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; - sg_set_page(&sg, f->page, f->size, f->page_offset); + struct page *page = skb_frag_page(f); + sg_set_page(&sg, page, f->size, f->page_offset); if (crypto_hash_update(desc, &sg, f->size)) return 1; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ea0d2183df4b..385c470195eb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2830,9 +2830,13 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - /* Do not moderate cwnd if it's already undone in cwr or recovery */ - if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { - tp->snd_cwnd = tp->snd_ssthresh; + + /* Do not moderate cwnd if it's already undone in cwr or recovery. */ + if (tp->undo_marker) { + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + else /* PRR */ + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2950,6 +2954,38 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, + int fast_rexmit, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2961,7 +2997,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit); * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, + int newly_acked_sacked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3111,13 +3148,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_down(sk, flag); + tp->prr_delivered += newly_acked_sacked; + tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); tcp_xmit_retransmit_queue(sk); } @@ -3632,6 +3673,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets; + int prior_sacked = tp->sacked_out; + int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3703,6 +3746,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + newly_acked_sacked = (prior_packets - prior_sacked) - + (tp->packets_out - tp->sacked_out); + if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3715,7 +3761,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - flag); + newly_acked_sacked, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 955b8e65b69e..b3f26114b03e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -72,6 +72,7 @@ #include <net/timewait_sock.h> #include <net/xfrm.h> #include <net/netdma.h> +#include <net/secure_seq.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -1577,7 +1578,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1594,7 +1595,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1602,7 +1603,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 882e0b0964d0..081dcd6fd0c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); eat -= skb_shinfo(skb)->frags[i].size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -1796,11 +1796,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tcp_event_new_data_sent(sk, skb); tcp_minshall_update(tp, mss_now, skb); - sent_pkts++; + sent_pkts += tcp_skb_pcount(skb); if (push_one) break; } + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += sent_pkts; if (likely(sent_pkts)) { tcp_cwnd_validate(sk); @@ -2294,6 +2296,9 @@ begin_fwd: return; NET_INC_STATS_BH(sock_net(sk), mib_idx); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += tcp_skb_pcount(skb); + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a19340a95..ebaa96bd3464 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1267,7 +1267,7 @@ int udp_disconnect(struct sock *sk, int flags) sk->sk_state = TCP_CLOSE; inet->inet_daddr = 0; inet->inet_dport = 0; - sock_rps_save_rxhash(sk, 0); + sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -1355,7 +1355,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int rc; if (inet_sk(sk)->inet_daddr) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); rc = ip_queue_rcv_skb(sk, skb); if (rc < 0) { @@ -1461,10 +1461,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto drop; if (sk_rcvqueues_full(sk, skb)) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a55500cc0b29..3053c685e249 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -428,7 +428,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ndev->tstamp = jiffies; addrconf_sysctl_register(ndev); /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, ndev); + RCU_INIT_POINTER(dev->ip6_ptr, ndev); /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (dst_get_neighbour(&rt->dst) == NULL) + if (dst_get_neighbour_raw(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; @@ -824,12 +824,13 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i { struct inet6_dev *idev = ifp->idev; struct in6_addr addr, *tmpaddr; - unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age; + unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age; unsigned long regen_advance; int tmp_plen; int ret = 0; int max_addresses; u32 addr_flags; + unsigned long now = jiffies; write_lock(&idev->lock); if (ift) { @@ -874,7 +875,7 @@ retry: goto out; } memcpy(&addr.s6_addr[8], idev->rndid, 8); - age = (jiffies - ifp->tstamp) / HZ; + age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); @@ -884,7 +885,6 @@ retry: idev->cnf.max_desync_factor); tmp_plen = ifp->prefix_len; max_addresses = idev->cnf.max_addresses; - tmp_cstamp = ifp->cstamp; tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); @@ -929,7 +929,7 @@ retry: ift->ifpub = ifp; ift->valid_lft = tmp_valid_lft; ift->prefered_lft = tmp_prefered_lft; - ift->cstamp = tmp_cstamp; + ift->cstamp = now; ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); @@ -1999,25 +1999,50 @@ ok: #ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ - list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { - /* - * When adjusting the lifetimes of an existing - * temporary address, only lower the lifetimes. - * Implementations must not increase the - * lifetimes of an existing temporary address - * when processing a Prefix Information Option. - */ + list_for_each_entry(ift, &in6_dev->tempaddr_list, + tmp_list) { + int age, max_valid, max_prefered; + if (ifp != ift->ifpub) continue; + /* + * RFC 4941 section 3.3: + * If a received option will extend the lifetime + * of a public address, the lifetimes of + * temporary addresses should be extended, + * subject to the overall constraint that no + * temporary addresses should ever remain + * "valid" or "preferred" for a time longer than + * (TEMP_VALID_LIFETIME) or + * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), + * respectively. + */ + age = (now - ift->cstamp) / HZ; + max_valid = in6_dev->cnf.temp_valid_lft - age; + if (max_valid < 0) + max_valid = 0; + + max_prefered = in6_dev->cnf.temp_prefered_lft - + in6_dev->cnf.max_desync_factor - + age; + if (max_prefered < 0) + max_prefered = 0; + + if (valid_lft > max_valid) + valid_lft = max_valid; + + if (prefered_lft > max_prefered) + prefered_lft = max_prefered; + spin_lock(&ift->lock); flags = ift->flags; - if (ift->valid_lft > valid_lft && - ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) - ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; - if (ift->prefered_lft > prefered_lft && - ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) - ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; + ift->valid_lft = valid_lft; + ift->prefered_lft = prefered_lft; + ift->tstamp = now; + if (prefered_lft > 0) + ift->flags &= ~IFA_F_DEPRECATED; + spin_unlock(&ift->lock); if (!(flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ift); @@ -2025,9 +2050,11 @@ ok: if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { /* - * When a new public address is created as described in [ADDRCONF], - * also create a new temporary address. Also create a temporary - * address if it's enabled but no temporary address currently exists. + * When a new public address is created as + * described in [ADDRCONF], also create a new + * temporary address. Also create a temporary + * address if it's enabled but no temporary + * address currently exists. */ read_unlock_bh(&in6_dev->lock); ipv6_create_tempaddr(ifp, NULL); @@ -2706,7 +2733,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) idev->dead = 1; /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, NULL); + RCU_INIT_POINTER(dev->ip6_ptr, NULL); /* Step 1.5: remove snmp6 entry */ snmp6_unregister_dev(idev); @@ -2969,12 +2996,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_NEWADDR, ifp); - /* If added prefix is link local and forwarding is off, - start sending router solicitations. + /* If added prefix is link local and we are prepared to process + router advertisements, start sending router solicitations. */ - if ((ifp->idev->cnf.forwarding == 0 || - ifp->idev->cnf.forwarding == 2) && + if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) || + ifp->idev->cnf.accept_ra == 2) && ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0 && (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 16560336eb72..9ef1831746ef 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -33,6 +33,11 @@ #include <linux/errqueue.h> #include <asm/uaccess.h> +static inline int ipv6_mapped_addr_any(const struct in6_addr *a) +{ + return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0)); +} + int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; @@ -102,10 +107,12 @@ ipv4_connected: ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); - if (ipv6_addr_any(&np->saddr)) + if (ipv6_addr_any(&np->saddr) || + ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&np->rcv_saddr) || + ipv6_mapped_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); if (sk->sk_prot->rehash) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 79a485e8a700..1318de4c3e8d 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -273,12 +273,12 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; #endif - struct dst_entry *dst; + struct dst_entry *dst = skb_dst(skb); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -289,9 +289,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) dstbuf = opt->dst1; #endif - dst = dst_clone(skb_dst(skb)); if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { - dst_release(dst); skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -304,7 +302,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); - dst_release(dst); return -1; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 11900417b1cc..2b59154c65d3 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -490,7 +490,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) goto out_dst_release; } - idev = in6_dev_get(skb->dev); + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), @@ -500,19 +501,16 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); - goto out_put; + } else { + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + len + sizeof(struct icmp6hdr)); } - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); - -out_put: - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); } - EXPORT_SYMBOL(icmpv6_send); static void icmpv6_echo_reply(struct sk_buff *skb) @@ -569,7 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; @@ -583,13 +581,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); - goto out_put; + } else { + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + skb->len + sizeof(struct icmp6hdr)); } - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); - -out_put: - if (likely(idev != NULL)) - in6_dev_put(idev); dst_release(dst); out: icmpv6_xmit_unlock(sk); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a58e8cf6646..2916200f90c1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -211,6 +211,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) struct flowi6 fl6; struct dst_entry *dst; struct in6_addr *final_p, final; + int res; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; @@ -241,12 +242,14 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) __inet6_csk_dst_store(sk, dst, NULL, NULL); } - skb_dst_set(skb, dst_clone(dst)); + rcu_read_lock(); + skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ ipv6_addr_copy(&fl6.daddr, &np->daddr); - return ip6_xmit(sk, skb, &fl6, np->opt); + res = ip6_xmit(sk, skb, &fl6, np->opt); + rcu_read_unlock(); + return res; } - EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b53197233709..73f1a00a96af 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -20,6 +20,7 @@ #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> +#include <net/secure_seq.h> #include <net/ip.h> int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 54a4678955bf..320d91d20ad7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32e5339db0c8..835c04b5239f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); @@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ + rcu_read_lock(); n = dst_get_neighbour(*dst); if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; + rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } + } else { + rcu_read_unlock(); } #endif @@ -1471,13 +1480,13 @@ alloc_new_skb: if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; - if (page != frag->page) { + if (page != skb_frag_page(frag)) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } - get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + skb_frag_ref(skb, i); frag = &skb_shinfo(skb)->frags[i]; } } else if(i < MAX_SKB_FRAGS) { @@ -1497,7 +1506,8 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { + if (getfrag(from, skb_frag_address(frag)+frag->size, + offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0bc98886c383..694d70a8a0ee 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -218,8 +218,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); - rcu_assign_pointer(t->next , rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); + RCU_INIT_POINTER(t->next , rtnl_dereference(*tp)); + RCU_INIT_POINTER(*tp, t); } /** @@ -237,7 +237,7 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - rcu_assign_pointer(*tp, t->next); + RCU_INIT_POINTER(*tp, t->next); break; } } @@ -350,7 +350,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) - rcu_assign_pointer(ip6n->tnls_wc[0], NULL); + RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else ip6_tnl_unlink(ip6n, t); ip6_tnl_dst_reset(t); @@ -889,7 +889,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; - struct dst_entry *dst; + struct dst_entry *dst, *ndst = NULL; struct net_device *tdev; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); @@ -897,19 +897,19 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int err = -1; int pkt_len; - if ((dst = ip6_tnl_dst_check(t)) != NULL) - dst_hold(dst); - else { - dst = ip6_route_output(net, NULL, fl6); + dst = ip6_tnl_dst_check(t); + if (!dst) { + ndst = ip6_route_output(net, NULL, fl6); - if (dst->error) + if (ndst->error) goto tx_err_link_failure; - dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - dst = NULL; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; goto tx_err_link_failure; } + dst = ndst; } tdev = dst->dev; @@ -955,7 +955,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb = new_skb; } skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(dst)); + skb_dst_set_noref(skb, dst); skb->transport_header = skb->network_header; @@ -987,13 +987,14 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, stats->tx_errors++; stats->tx_aborted_errors++; } - ip6_tnl_dst_store(t, dst); + if (ndst) + ip6_tnl_dst_store(t, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(dst); + dst_release(ndst); return err; } @@ -1439,7 +1440,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - rcu_assign_pointer(ip6n->tnls_wc[0], t); + RCU_INIT_POINTER(ip6n->tnls_wc[0], t); return 0; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9cb191ecaba8..147ede38ab48 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct ipv6_pinfo *np = inet6_sk(sk); int len; @@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; lock_sock(sk); skb = np->pktoptions; @@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if(level != SOL_IPV6) return -ENOPROTOOPT; - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { @@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ipv6_getsockopt); - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9da6e02eaaeb..1f52dd257631 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -533,7 +533,8 @@ void ndisc_send_skb(struct sk_buff *skb, skb_dst_set(skb, dst); - idev = in6_dev_get(dst->dev); + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, @@ -543,8 +544,7 @@ void ndisc_send_skb(struct sk_buff *skb, ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); } EXPORT_SYMBOL(ndisc_send_skb); @@ -1039,7 +1039,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) if (skb->len < sizeof(*rs_msg)) return; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (!idev) { if (net_ratelimit()) ND_PRINTK1("ICMP6 RS: can't find in6 device\n"); @@ -1080,7 +1080,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) neigh_release(neigh); } out: - in6_dev_put(idev); + return; } static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) @@ -1179,7 +1179,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) * set the RA_RECV flag in the interface */ - in6_dev = in6_dev_get(skb->dev); + in6_dev = __in6_dev_get(skb->dev); if (in6_dev == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: can't find inet6 device for %s.\n", @@ -1188,7 +1188,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (!ndisc_parse_options(opt, optlen, &ndopts)) { - in6_dev_put(in6_dev); ND_PRINTK2(KERN_WARNING "ICMP6 RA: invalid ND options\n"); return; @@ -1255,7 +1254,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() failed to add default route.\n", __func__); - in6_dev_put(in6_dev); return; } @@ -1265,7 +1263,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) "ICMPv6 RA: %s() got default router without neighbour.\n", __func__); dst_release(&rt->dst); - in6_dev_put(in6_dev); return; } neigh->flags |= NTF_ROUTER; @@ -1422,7 +1419,6 @@ out: dst_release(&rt->dst); else if (neigh) neigh_release(neigh); - in6_dev_put(in6_dev); } static void ndisc_redirect_rcv(struct sk_buff *skb) @@ -1481,13 +1477,11 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - in6_dev = in6_dev_get(skb->dev); + in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) { - in6_dev_put(in6_dev); + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) return; - } /* RFC2461 8.1: * The IP source address of the Redirect MUST be the same as the current @@ -1497,7 +1491,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: invalid ND options\n"); - in6_dev_put(in6_dev); return; } if (ndopts.nd_opts_tgt_lladdr) { @@ -1506,7 +1499,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!lladdr) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: invalid link-layer address length\n"); - in6_dev_put(in6_dev); return; } } @@ -1518,7 +1510,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) on_link); neigh_release(neigh); } - in6_dev_put(in6_dev); } void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, @@ -1651,7 +1642,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, csum_partial(icmph, len, 0)); skb_dst_set(buff, dst); - idev = in6_dev_get(dst->dev); + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); @@ -1660,8 +1652,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; release: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6a79f3081bdb..f34902f1ba33 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -130,14 +130,14 @@ static mh_filter_t __rcu *mh_filter __read_mostly; int rawv6_mh_filter_register(mh_filter_t filter) { - rcu_assign_pointer(mh_filter, filter); + RCU_INIT_POINTER(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); int rawv6_mh_filter_unregister(mh_filter_t filter) { - rcu_assign_pointer(mh_filter, NULL); + RCU_INIT_POINTER(mh_filter, NULL); synchronize_rcu(); return 0; } @@ -372,9 +372,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_unlock(&raw_v6_hashinfo.lock); } -static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) +static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) { - if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) && + if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8987da06667..9e69eb0ec6dd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -364,7 +364,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; + struct neighbour *neigh; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -373,8 +373,10 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ + rcu_read_lock(); + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - return; + goto out; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -387,8 +389,11 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); - } else + } else { read_unlock_bh(&neigh->lock); + } +out: + rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -412,8 +417,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = dst_get_neighbour(&rt->dst); + struct neighbour *neigh; int m; + + rcu_read_lock(); + neigh = dst_get_neighbour(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -430,6 +438,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; + rcu_read_unlock(); return m; } @@ -769,7 +778,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst))); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; } @@ -803,7 +812,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1587,7 +1596,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour(&rt->dst)) + if (neigh == dst_get_neighbour_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt, dest); @@ -1682,7 +1691,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -2326,6 +2335,7 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; + struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2414,8 +2424,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (dst_get_neighbour(&rt->dst)) - NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) + NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + rcu_read_unlock(); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2608,12 +2621,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif + rcu_read_lock(); n = dst_get_neighbour(&rt->dst); if (n) { seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } + rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 07bf1085458f..a7a18602a046 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - rcu_assign_pointer(*tp, t->next); + RCU_INIT_POINTER(*tp, t->next); break; } } @@ -192,8 +192,8 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t); - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); + RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); + RCU_INIT_POINTER(*tp, t); } static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) @@ -391,7 +391,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) p->addr = a->addr; p->flags = a->flags; t->prl_count++; - rcu_assign_pointer(t->prl, p); + RCU_INIT_POINTER(t->prl, p); out: return err; } @@ -474,7 +474,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { - rcu_assign_pointer(sitn->tunnels_wc[0], NULL); + RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { ipip6_tunnel_unlink(sitn, netdev_priv(dev)); ipip6_tunnel_del_prl(netdev_priv(dev), NULL); @@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; + if (tos == 1) + tos = ipv6_get_dsfield(iph6); + /* ISATAP (RFC4214) - must come before 6to4 */ if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; @@ -1173,7 +1176,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; dev_hold(dev); - rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); + RCU_INIT_POINTER(sitn->tunnels_wc[0], tunnel); return 0; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 89d5bf806222..ac838965ff34 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -165,7 +165,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - bool ecn_ok; + bool ecn_ok = false; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 78aa53492b3e..44a5859535b5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -61,6 +61,7 @@ #include <net/timewait_sock.h> #include <net/netdma.h> #include <net/inet_common.h> +#include <net/secure_seq.h> #include <asm/uaccess.h> @@ -1627,7 +1628,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1649,7 +1650,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1657,7 +1658,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29213b51c499..35bbdc42241e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -509,7 +509,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) int is_udplite = IS_UDPLITE(sk); if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; @@ -533,7 +533,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { + if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) goto drop; } diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index e8d5f4405d68..d14152e866d9 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -50,7 +50,7 @@ static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_open = irlan_eth_open, .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, - .ndo_set_multicast_list = irlan_eth_set_multicast_list, + .ndo_set_rx_mode = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig index 16ce9cd4f39e..497fbe732def 100644 --- a/net/iucv/Kconfig +++ b/net/iucv/Kconfig @@ -1,15 +1,17 @@ config IUCV - tristate "IUCV support (S390 - z/VM only)" depends on S390 + def_tristate y if S390 + prompt "IUCV support (S390 - z/VM only)" help Select this option if you want to use inter-user communication under VM or VIF. If you run on z/VM, say "Y" to enable a fast communication link between VM guests. config AFIUCV - tristate "AF_IUCV support (S390 - z/VM only)" - depends on IUCV + depends on S390 + def_tristate m if QETH_L3 || IUCV + prompt "AF_IUCV Socket support (S390 - z/VM and HiperSockets transport)" help - Select this option if you want to use inter-user communication under - VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast - communication link between VM guests. + Select this option if you want to use AF_IUCV socket applications + based on z/VM inter-user communication vehicle or based on + HiperSockets. diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index e2013e434d03..c39f3a43cd80 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -27,10 +27,9 @@ #include <asm/cpcmd.h> #include <linux/kmod.h> -#include <net/iucv/iucv.h> #include <net/iucv/af_iucv.h> -#define VERSION "1.1" +#define VERSION "1.2" static char iucv_userid[80]; @@ -42,6 +41,8 @@ static struct proto iucv_proto = { .obj_size = sizeof(struct iucv_sock), }; +static struct iucv_interface *pr_iucv; + /* special AF_IUCV IPRM messages */ static const u8 iprm_shutdown[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; @@ -90,6 +91,12 @@ do { \ static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); +static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); +static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, + struct sk_buff *skb, u8 flags); +static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); + /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); @@ -165,7 +172,7 @@ static int afiucv_pm_freeze(struct device *dev) case IUCV_CLOSING: case IUCV_CONNECTED: if (iucv->path) { - err = iucv_path_sever(iucv->path, NULL); + err = pr_iucv->path_sever(iucv->path, NULL); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -229,7 +236,7 @@ static const struct dev_pm_ops afiucv_pm_ops = { static struct device_driver af_iucv_driver = { .owner = THIS_MODULE, .name = "afiucv", - .bus = &iucv_bus, + .bus = NULL, .pm = &afiucv_pm_ops, }; @@ -294,7 +301,11 @@ static inline int iucv_below_msglim(struct sock *sk) if (sk->sk_state != IUCV_CONNECTED) return 1; - return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + if (iucv->transport == AF_IUCV_TRANS_IUCV) + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + else + return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) && + (atomic_read(&iucv->pendings) <= 0)); } /** @@ -312,6 +323,79 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_unlock(); } +/** + * afiucv_hs_send() - send a message through HiperSockets transport + */ +static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, + struct sk_buff *skb, u8 flags) +{ + struct net *net = sock_net(sock); + struct iucv_sock *iucv = iucv_sk(sock); + struct af_iucv_trans_hdr *phs_hdr; + struct sk_buff *nskb; + int err, confirm_recv = 0; + + memset(skb->head, 0, ETH_HLEN); + phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb, + sizeof(struct af_iucv_trans_hdr)); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr)); + + phs_hdr->magic = ETH_P_AF_IUCV; + phs_hdr->version = 1; + phs_hdr->flags = flags; + if (flags == AF_IUCV_FLAG_SYN) + phs_hdr->window = iucv->msglimit; + else if ((flags == AF_IUCV_FLAG_WIN) || !flags) { + confirm_recv = atomic_read(&iucv->msg_recv); + phs_hdr->window = confirm_recv; + if (confirm_recv) + phs_hdr->flags = phs_hdr->flags | AF_IUCV_FLAG_WIN; + } + memcpy(phs_hdr->destUserID, iucv->dst_user_id, 8); + memcpy(phs_hdr->destAppName, iucv->dst_name, 8); + memcpy(phs_hdr->srcUserID, iucv->src_user_id, 8); + memcpy(phs_hdr->srcAppName, iucv->src_name, 8); + ASCEBC(phs_hdr->destUserID, sizeof(phs_hdr->destUserID)); + ASCEBC(phs_hdr->destAppName, sizeof(phs_hdr->destAppName)); + ASCEBC(phs_hdr->srcUserID, sizeof(phs_hdr->srcUserID)); + ASCEBC(phs_hdr->srcAppName, sizeof(phs_hdr->srcAppName)); + if (imsg) + memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message)); + + rcu_read_lock(); + skb->dev = dev_get_by_index_rcu(net, sock->sk_bound_dev_if); + rcu_read_unlock(); + if (!skb->dev) + return -ENODEV; + if (!(skb->dev->flags & IFF_UP)) + return -ENETDOWN; + if (skb->len > skb->dev->mtu) { + if (sock->sk_type == SOCK_SEQPACKET) + return -EMSGSIZE; + else + skb_trim(skb, skb->dev->mtu); + } + skb->protocol = ETH_P_AF_IUCV; + skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + skb_queue_tail(&iucv->send_skb_q, nskb); + err = dev_queue_xmit(skb); + if (err) { + skb_unlink(nskb, &iucv->send_skb_q); + kfree_skb(nskb); + } else { + atomic_sub(confirm_recv, &iucv->msg_recv); + WARN_ON(atomic_read(&iucv->msg_recv) < 0); + } + return err; +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -380,6 +464,8 @@ static void iucv_sock_close(struct sock *sk) unsigned char user_data[16]; struct iucv_sock *iucv = iucv_sk(sk); unsigned long timeo; + int err, blen; + struct sk_buff *skb; iucv_sock_clear_timer(sk); lock_sock(sk); @@ -390,6 +476,20 @@ static void iucv_sock_close(struct sock *sk) break; case IUCV_CONNECTED: + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + /* send fin */ + blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + skb = sock_alloc_send_skb(sk, blen, 1, &err); + if (skb) { + skb_reserve(skb, + sizeof(struct af_iucv_trans_hdr) + + ETH_HLEN); + err = afiucv_hs_send(NULL, sk, skb, + AF_IUCV_FLAG_FIN); + } + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + } case IUCV_DISCONN: sk->sk_state = IUCV_CLOSING; sk->sk_state_change(sk); @@ -412,7 +512,7 @@ static void iucv_sock_close(struct sock *sk) low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); ASCEBC(user_data, sizeof(user_data)); - iucv_path_sever(iucv->path, user_data); + pr_iucv->path_sever(iucv->path, user_data); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -444,23 +544,33 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent) static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; + struct iucv_sock *iucv; sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); if (!sk) return NULL; + iucv = iucv_sk(sk); sock_init_data(sock, sk); - INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); - spin_lock_init(&iucv_sk(sk)->accept_q_lock); - skb_queue_head_init(&iucv_sk(sk)->send_skb_q); - INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list); - spin_lock_init(&iucv_sk(sk)->message_q.lock); - skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); - iucv_sk(sk)->send_tag = 0; - iucv_sk(sk)->flags = 0; - iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT; - iucv_sk(sk)->path = NULL; - memset(&iucv_sk(sk)->src_user_id , 0, 32); + INIT_LIST_HEAD(&iucv->accept_q); + spin_lock_init(&iucv->accept_q_lock); + skb_queue_head_init(&iucv->send_skb_q); + INIT_LIST_HEAD(&iucv->message_q.list); + spin_lock_init(&iucv->message_q.lock); + skb_queue_head_init(&iucv->backlog_skb_q); + iucv->send_tag = 0; + atomic_set(&iucv->pendings, 0); + iucv->flags = 0; + iucv->msglimit = 0; + atomic_set(&iucv->msg_sent, 0); + atomic_set(&iucv->msg_recv, 0); + iucv->path = NULL; + iucv->sk_txnotify = afiucv_hs_callback_txnotify; + memset(&iucv->src_user_id , 0, 32); + if (pr_iucv) + iucv->transport = AF_IUCV_TRANS_IUCV; + else + iucv->transport = AF_IUCV_TRANS_HIPER; sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -591,7 +701,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; struct iucv_sock *iucv; - int err; + int err = 0; + struct net_device *dev; + char uid[9]; /* Verify the input sockaddr */ if (!addr || addr->sa_family != AF_IUCV) @@ -610,19 +722,46 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, err = -EADDRINUSE; goto done_unlock; } - if (iucv->path) { - err = 0; + if (iucv->path) goto done_unlock; - } /* Bind the socket */ - memcpy(iucv->src_name, sa->siucv_name, 8); - /* Copy the user id */ - memcpy(iucv->src_user_id, iucv_userid, 8); - sk->sk_state = IUCV_BOUND; - err = 0; + if (pr_iucv) + if (!memcmp(sa->siucv_user_id, iucv_userid, 8)) + goto vm_bind; /* VM IUCV transport */ + /* try hiper transport */ + memcpy(uid, sa->siucv_user_id, sizeof(uid)); + ASCEBC(uid, 8); + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + if (!memcmp(dev->perm_addr, uid, 8)) { + memcpy(iucv->src_name, sa->siucv_name, 8); + memcpy(iucv->src_user_id, sa->siucv_user_id, 8); + sock->sk->sk_bound_dev_if = dev->ifindex; + sk->sk_state = IUCV_BOUND; + iucv->transport = AF_IUCV_TRANS_HIPER; + if (!iucv->msglimit) + iucv->msglimit = IUCV_HIPER_MSGLIM_DEFAULT; + rcu_read_unlock(); + goto done_unlock; + } + } + rcu_read_unlock(); +vm_bind: + if (pr_iucv) { + /* use local userid for backward compat */ + memcpy(iucv->src_name, sa->siucv_name, 8); + memcpy(iucv->src_user_id, iucv_userid, 8); + sk->sk_state = IUCV_BOUND; + iucv->transport = AF_IUCV_TRANS_IUCV; + if (!iucv->msglimit) + iucv->msglimit = IUCV_QUEUELEN_DEFAULT; + goto done_unlock; + } + /* found no dev to bind */ + err = -ENODEV; done_unlock: /* Release the socket list lock */ write_unlock_bh(&iucv_sk_list.lock); @@ -658,45 +797,44 @@ static int iucv_sock_autobind(struct sock *sk) memcpy(&iucv->src_name, name, 8); + if (!iucv->msglimit) + iucv->msglimit = IUCV_QUEUELEN_DEFAULT; + return err; } -/* Connect an unconnected socket */ -static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, - int alen, int flags) +static int afiucv_hs_connect(struct socket *sock) { - struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; - struct iucv_sock *iucv; - unsigned char user_data[16]; - int err; - - if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) - return -EINVAL; - - if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) - return -EBADFD; - - if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) - return -EINVAL; + struct sk_buff *skb; + int blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + int err = 0; - if (sk->sk_state == IUCV_OPEN) { - err = iucv_sock_autobind(sk); - if (unlikely(err)) - return err; + /* send syn */ + skb = sock_alloc_send_skb(sk, blen, 1, &err); + if (!skb) { + err = -ENOMEM; + goto done; } + skb->dev = NULL; + skb_reserve(skb, blen); + err = afiucv_hs_send(NULL, sk, skb, AF_IUCV_FLAG_SYN); +done: + return err; +} - lock_sock(sk); - - /* Set the destination information */ - memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8); - memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8); +static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + unsigned char user_data[16]; + int err; high_nmcpy(user_data, sa->siucv_name); - low_nmcpy(user_data, iucv_sk(sk)->src_name); + low_nmcpy(user_data, iucv->src_name); ASCEBC(user_data, sizeof(user_data)); - iucv = iucv_sk(sk); /* Create path. */ iucv->path = iucv_path_alloc(iucv->msglimit, IUCV_IPRMDATA, GFP_KERNEL); @@ -704,8 +842,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, err = -ENOMEM; goto done; } - err = iucv_path_connect(iucv->path, &af_iucv_handler, - sa->siucv_user_id, NULL, user_data, sk); + err = pr_iucv->path_connect(iucv->path, &af_iucv_handler, + sa->siucv_user_id, NULL, user_data, + sk); if (err) { iucv_path_free(iucv->path); iucv->path = NULL; @@ -724,21 +863,62 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, err = -ECONNREFUSED; break; } - goto done; } +done: + return err; +} - if (sk->sk_state != IUCV_CONNECTED) { +/* Connect an unconnected socket */ +static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int err; + + if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + return -EINVAL; + + if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) + return -EBADFD; + + if (sk->sk_state == IUCV_OPEN && + iucv->transport == AF_IUCV_TRANS_HIPER) + return -EBADFD; /* explicit bind required */ + + if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) + return -EINVAL; + + if (sk->sk_state == IUCV_OPEN) { + err = iucv_sock_autobind(sk); + if (unlikely(err)) + return err; + } + + lock_sock(sk); + + /* Set the destination information */ + memcpy(iucv->dst_user_id, sa->siucv_user_id, 8); + memcpy(iucv->dst_name, sa->siucv_name, 8); + + if (iucv->transport == AF_IUCV_TRANS_HIPER) + err = afiucv_hs_connect(sock); + else + err = afiucv_path_connect(sock, addr); + if (err) + goto done; + + if (sk->sk_state != IUCV_CONNECTED) err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, IUCV_DISCONN), sock_sndtimeo(sk, flags & O_NONBLOCK)); - } - if (sk->sk_state == IUCV_DISCONN) { + if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_CLOSED) err = -ECONNREFUSED; - } - if (err) { - iucv_path_sever(iucv->path, NULL); + if (err && iucv->transport == AF_IUCV_TRANS_IUCV) { + pr_iucv->path_sever(iucv->path, NULL); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -833,20 +1013,21 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, { struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); addr->sa_family = AF_IUCV; *len = sizeof(struct sockaddr_iucv); if (peer) { - memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8); - memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8); + memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8); + memcpy(siucv->siucv_name, iucv->dst_name, 8); } else { - memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8); - memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8); + memcpy(siucv->siucv_user_id, iucv->src_user_id, 8); + memcpy(siucv->siucv_name, iucv->src_name, 8); } memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port)); memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); - memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); + memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); return 0; } @@ -871,7 +1052,7 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, memcpy(prmdata, (void *) skb->data, skb->len); prmdata[7] = 0xff - (u8) skb->len; - return iucv_message_send(path, msg, IUCV_IPRMDATA, 0, + return pr_iucv->message_send(path, msg, IUCV_IPRMDATA, 0, (void *) prmdata, 8); } @@ -960,9 +1141,16 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - skb = sock_alloc_send_skb(sk, len, noblock, &err); + if (iucv->transport == AF_IUCV_TRANS_HIPER) + skb = sock_alloc_send_skb(sk, + len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN, + noblock, &err); + else + skb = sock_alloc_send_skb(sk, len, noblock, &err); if (!skb) goto out; + if (iucv->transport == AF_IUCV_TRANS_HIPER) + skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN); if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { err = -EFAULT; goto fail; @@ -983,6 +1171,15 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + atomic_inc(&iucv->msg_sent); + err = afiucv_hs_send(&txmsg, sk, skb, 0); + if (err) { + atomic_dec(&iucv->msg_sent); + goto fail; + } + goto release; + } skb_queue_tail(&iucv->send_skb_q, skb); if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) @@ -999,13 +1196,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* this error should never happen since the * IUCV_IPRMDATA path flag is set... sever path */ if (err == 0x15) { - iucv_path_sever(iucv->path, NULL); + pr_iucv->path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } } else - err = iucv_message_send(iucv->path, &txmsg, 0, 0, + err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0, (void *) skb->data, skb->len); if (err) { if (err == 3) { @@ -1023,6 +1220,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto fail; } +release: release_sock(sk); return len; @@ -1095,8 +1293,9 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, skb->len = 0; } } else { - rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA, - skb->data, len, NULL); + rc = pr_iucv->message_receive(path, msg, + msg->flags & IUCV_IPRMDATA, + skb->data, len, NULL); if (rc) { kfree_skb(skb); return; @@ -1110,7 +1309,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, kfree_skb(skb); skb = NULL; if (rc) { - iucv_path_sever(path, NULL); + pr_iucv->path_sever(path, NULL); return; } skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); @@ -1154,7 +1353,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int copied, rlen; - struct sk_buff *skb, *rskb, *cskb; + struct sk_buff *skb, *rskb, *cskb, *sskb; + int blen; int err = 0; if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && @@ -1179,7 +1379,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied = min_t(unsigned int, rlen, len); cskb = skb; - if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { + if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1217,6 +1417,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } kfree_skb(skb); + atomic_inc(&iucv->msg_recv); /* Queue backlog skbs */ spin_lock_bh(&iucv->message_q.lock); @@ -1233,6 +1434,24 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb_queue_empty(&iucv->backlog_skb_q)) { if (!list_empty(&iucv->message_q.list)) iucv_process_message_q(sk); + if (atomic_read(&iucv->msg_recv) >= + iucv->msglimit / 2) { + /* send WIN to peer */ + blen = sizeof(struct af_iucv_trans_hdr) + + ETH_HLEN; + sskb = sock_alloc_send_skb(sk, blen, 1, &err); + if (sskb) { + skb_reserve(sskb, + sizeof(struct af_iucv_trans_hdr) + + ETH_HLEN); + err = afiucv_hs_send(NULL, sk, sskb, + AF_IUCV_FLAG_WIN); + } + if (err) { + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + } + } } spin_unlock_bh(&iucv->message_q.lock); } @@ -1327,8 +1546,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { txmsg.class = 0; txmsg.tag = 0; - err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, - (void *) iprm_shutdown, 8); + err = pr_iucv->message_send(iucv->path, &txmsg, IUCV_IPRMDATA, + 0, (void *) iprm_shutdown, 8); if (err) { switch (err) { case 1: @@ -1345,7 +1564,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how) } if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) { - err = iucv_path_quiesce(iucv_sk(sk)->path, NULL); + err = pr_iucv->path_quiesce(iucv->path, NULL); if (err) err = -ENOTCONN; @@ -1372,7 +1591,7 @@ static int iucv_sock_release(struct socket *sock) /* Unregister with IUCV base support */ if (iucv_sk(sk)->path) { - iucv_path_sever(iucv_sk(sk)->path, NULL); + pr_iucv->path_sever(iucv_sk(sk)->path, NULL); iucv_path_free(iucv_sk(sk)->path); iucv_sk(sk)->path = NULL; } @@ -1514,14 +1733,14 @@ static int iucv_callback_connreq(struct iucv_path *path, high_nmcpy(user_data, iucv->dst_name); ASCEBC(user_data, sizeof(user_data)); if (sk->sk_state != IUCV_LISTEN) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } /* Check for backlog size */ if (sk_acceptq_is_full(sk)) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } @@ -1529,7 +1748,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Create the new socket */ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); if (!nsk) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } @@ -1553,9 +1772,9 @@ static int iucv_callback_connreq(struct iucv_path *path, /* set message limit for path based on msglimit of accepting socket */ niucv->msglimit = iucv->msglimit; path->msglim = iucv->msglimit; - err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); + err = pr_iucv->path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); iucv_sock_kill(nsk); goto fail; @@ -1589,7 +1808,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) int len; if (sk->sk_shutdown & RCV_SHUTDOWN) { - iucv_message_reject(path, msg); + pr_iucv->message_reject(path, msg); return; } @@ -1692,6 +1911,389 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16]) bh_unlock_sock(sk); } +/***************** HiperSockets transport callbacks ********************/ +static void afiucv_swap_src_dest(struct sk_buff *skb) +{ + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + char tmpID[8]; + char tmpName[8]; + + ASCEBC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID)); + ASCEBC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); + ASCEBC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID)); + ASCEBC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName)); + memcpy(tmpID, trans_hdr->srcUserID, 8); + memcpy(tmpName, trans_hdr->srcAppName, 8); + memcpy(trans_hdr->srcUserID, trans_hdr->destUserID, 8); + memcpy(trans_hdr->srcAppName, trans_hdr->destAppName, 8); + memcpy(trans_hdr->destUserID, tmpID, 8); + memcpy(trans_hdr->destAppName, tmpName, 8); + skb_push(skb, ETH_HLEN); + memset(skb->data, 0, ETH_HLEN); +} + +/** + * afiucv_hs_callback_syn - react on received SYN + **/ +static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) +{ + struct sock *nsk; + struct iucv_sock *iucv, *niucv; + struct af_iucv_trans_hdr *trans_hdr; + int err; + + iucv = iucv_sk(sk); + trans_hdr = (struct af_iucv_trans_hdr *)skb->data; + if (!iucv) { + /* no sock - connection refused */ + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; + err = dev_queue_xmit(skb); + goto out; + } + + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + bh_lock_sock(sk); + if ((sk->sk_state != IUCV_LISTEN) || + sk_acceptq_is_full(sk) || + !nsk) { + /* error on server socket - connection refused */ + if (nsk) + sk_free(nsk); + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; + err = dev_queue_xmit(skb); + bh_unlock_sock(sk); + goto out; + } + + niucv = iucv_sk(nsk); + iucv_sock_init(nsk, sk); + niucv->transport = AF_IUCV_TRANS_HIPER; + niucv->msglimit = iucv->msglimit; + if (!trans_hdr->window) + niucv->msglimit_peer = IUCV_HIPER_MSGLIM_DEFAULT; + else + niucv->msglimit_peer = trans_hdr->window; + memcpy(niucv->dst_name, trans_hdr->srcAppName, 8); + memcpy(niucv->dst_user_id, trans_hdr->srcUserID, 8); + memcpy(niucv->src_name, iucv->src_name, 8); + memcpy(niucv->src_user_id, iucv->src_user_id, 8); + nsk->sk_bound_dev_if = sk->sk_bound_dev_if; + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK; + trans_hdr->window = niucv->msglimit; + /* if receiver acks the xmit connection is established */ + err = dev_queue_xmit(skb); + if (!err) { + iucv_accept_enqueue(sk, nsk); + nsk->sk_state = IUCV_CONNECTED; + sk->sk_data_ready(sk, 1); + } else + iucv_sock_kill(nsk); + bh_unlock_sock(sk); + +out: + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_synack() - react on received SYN-ACK + **/ +static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + + if (!iucv) + goto out; + if (sk->sk_state != IUCV_BOUND) + goto out; + bh_lock_sock(sk); + iucv->msglimit_peer = trans_hdr->window; + sk->sk_state = IUCV_CONNECTED; + sk->sk_state_change(sk); + bh_unlock_sock(sk); +out: + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_synfin() - react on received SYN_FIN + **/ +static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (!iucv) + goto out; + if (sk->sk_state != IUCV_BOUND) + goto out; + bh_lock_sock(sk); + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + bh_unlock_sock(sk); +out: + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_fin() - react on received FIN + **/ +static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + /* other end of connection closed */ + if (iucv) { + bh_lock_sock(sk); + if (!list_empty(&iucv->accept_q)) + sk->sk_state = IUCV_SEVERED; + else + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + bh_unlock_sock(sk); + } + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_win() - react on received WIN + **/ +static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + + if (!iucv) + return NET_RX_SUCCESS; + + if (sk->sk_state != IUCV_CONNECTED) + return NET_RX_SUCCESS; + + atomic_sub(trans_hdr->window, &iucv->msg_sent); + iucv_sock_wake_msglim(sk); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_rx() - react on received data + **/ +static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (!iucv) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + if (sk->sk_state != IUCV_CONNECTED) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + /* write stuff from iucv_msg to skb cb */ + if (skb->len <= sizeof(struct af_iucv_trans_hdr)) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + spin_lock(&iucv->message_q.lock); + if (skb_queue_empty(&iucv->backlog_skb_q)) { + if (sock_queue_rcv_skb(sk, skb)) { + /* handle rcv queue full */ + skb_queue_tail(&iucv->backlog_skb_q, skb); + } + } else + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); + spin_unlock(&iucv->message_q.lock); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_rcv() - base function for arriving data through HiperSockets + * transport + * called from netif RX softirq + **/ +static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct hlist_node *node; + struct sock *sk; + struct iucv_sock *iucv; + struct af_iucv_trans_hdr *trans_hdr; + char nullstring[8]; + int err = 0; + + skb_pull(skb, ETH_HLEN); + trans_hdr = (struct af_iucv_trans_hdr *)skb->data; + EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); + EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID)); + EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName)); + EBCASC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID)); + memset(nullstring, 0, sizeof(nullstring)); + iucv = NULL; + sk = NULL; + read_lock(&iucv_sk_list.lock); + sk_for_each(sk, node, &iucv_sk_list.head) { + if (trans_hdr->flags == AF_IUCV_FLAG_SYN) { + if ((!memcmp(&iucv_sk(sk)->src_name, + trans_hdr->destAppName, 8)) && + (!memcmp(&iucv_sk(sk)->src_user_id, + trans_hdr->destUserID, 8)) && + (!memcmp(&iucv_sk(sk)->dst_name, nullstring, 8)) && + (!memcmp(&iucv_sk(sk)->dst_user_id, + nullstring, 8))) { + iucv = iucv_sk(sk); + break; + } + } else { + if ((!memcmp(&iucv_sk(sk)->src_name, + trans_hdr->destAppName, 8)) && + (!memcmp(&iucv_sk(sk)->src_user_id, + trans_hdr->destUserID, 8)) && + (!memcmp(&iucv_sk(sk)->dst_name, + trans_hdr->srcAppName, 8)) && + (!memcmp(&iucv_sk(sk)->dst_user_id, + trans_hdr->srcUserID, 8))) { + iucv = iucv_sk(sk); + break; + } + } + } + read_unlock(&iucv_sk_list.lock); + if (!iucv) + sk = NULL; + + /* no sock + how should we send with no sock + 1) send without sock no send rc checking? + 2) introduce default sock to handle this cases + + SYN -> send SYN|ACK in good case, send SYN|FIN in bad case + data -> send FIN + SYN|ACK, SYN|FIN, FIN -> no action? */ + + switch (trans_hdr->flags) { + case AF_IUCV_FLAG_SYN: + /* connect request */ + err = afiucv_hs_callback_syn(sk, skb); + break; + case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK): + /* connect request confirmed */ + err = afiucv_hs_callback_synack(sk, skb); + break; + case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN): + /* connect request refused */ + err = afiucv_hs_callback_synfin(sk, skb); + break; + case (AF_IUCV_FLAG_FIN): + /* close request */ + err = afiucv_hs_callback_fin(sk, skb); + break; + case (AF_IUCV_FLAG_WIN): + err = afiucv_hs_callback_win(sk, skb); + if (skb->len > sizeof(struct af_iucv_trans_hdr)) + err = afiucv_hs_callback_rx(sk, skb); + else + kfree(skb); + break; + case 0: + /* plain data frame */ + err = afiucv_hs_callback_rx(sk, skb); + break; + default: + ; + } + + return err; +} + +/** + * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets + * transport + **/ +static void afiucv_hs_callback_txnotify(struct sk_buff *skb, + enum iucv_tx_notify n) +{ + struct sock *isk = skb->sk; + struct sock *sk = NULL; + struct iucv_sock *iucv = NULL; + struct sk_buff_head *list; + struct sk_buff *list_skb; + struct sk_buff *this = NULL; + unsigned long flags; + struct hlist_node *node; + + read_lock(&iucv_sk_list.lock); + sk_for_each(sk, node, &iucv_sk_list.head) + if (sk == isk) { + iucv = iucv_sk(sk); + break; + } + read_unlock(&iucv_sk_list.lock); + + if (!iucv) + return; + + bh_lock_sock(sk); + list = &iucv->send_skb_q; + list_skb = list->next; + if (skb_queue_empty(list)) + goto out_unlock; + + spin_lock_irqsave(&list->lock, flags); + while (list_skb != (struct sk_buff *)list) { + if (skb_shinfo(list_skb) == skb_shinfo(skb)) { + this = list_skb; + switch (n) { + case TX_NOTIFY_OK: + __skb_unlink(this, list); + iucv_sock_wake_msglim(sk); + kfree_skb(this); + break; + case TX_NOTIFY_PENDING: + atomic_inc(&iucv->pendings); + break; + case TX_NOTIFY_DELAYED_OK: + __skb_unlink(this, list); + atomic_dec(&iucv->pendings); + if (atomic_read(&iucv->pendings) <= 0) + iucv_sock_wake_msglim(sk); + kfree_skb(this); + break; + case TX_NOTIFY_UNREACHABLE: + case TX_NOTIFY_DELAYED_UNREACHABLE: + case TX_NOTIFY_TPQFULL: /* not yet used */ + case TX_NOTIFY_GENERALERROR: + case TX_NOTIFY_DELAYED_GENERALERROR: + __skb_unlink(this, list); + kfree_skb(this); + if (!list_empty(&iucv->accept_q)) + sk->sk_state = IUCV_SEVERED; + else + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + break; + } + break; + } + list_skb = list_skb->next; + } + spin_unlock_irqrestore(&list->lock, flags); + +out_unlock: + bh_unlock_sock(sk); +} static const struct proto_ops iucv_sock_ops = { .family = PF_IUCV, .owner = THIS_MODULE, @@ -1718,71 +2320,104 @@ static const struct net_proto_family iucv_sock_family_ops = { .create = iucv_sock_create, }; -static int __init afiucv_init(void) +static struct packet_type iucv_packet_type = { + .type = cpu_to_be16(ETH_P_AF_IUCV), + .func = afiucv_hs_rcv, +}; + +static int afiucv_iucv_init(void) { int err; - if (!MACHINE_IS_VM) { - pr_err("The af_iucv module cannot be loaded" - " without z/VM\n"); - err = -EPROTONOSUPPORT; - goto out; - } - cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); - if (unlikely(err)) { - WARN_ON(err); - err = -EPROTONOSUPPORT; - goto out; - } - - err = iucv_register(&af_iucv_handler, 0); + err = pr_iucv->iucv_register(&af_iucv_handler, 0); if (err) goto out; - err = proto_register(&iucv_proto, 0); - if (err) - goto out_iucv; - err = sock_register(&iucv_sock_family_ops); - if (err) - goto out_proto; /* establish dummy device */ + af_iucv_driver.bus = pr_iucv->bus; err = driver_register(&af_iucv_driver); if (err) - goto out_sock; + goto out_iucv; af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); if (!af_iucv_dev) { err = -ENOMEM; goto out_driver; } dev_set_name(af_iucv_dev, "af_iucv"); - af_iucv_dev->bus = &iucv_bus; - af_iucv_dev->parent = iucv_root; + af_iucv_dev->bus = pr_iucv->bus; + af_iucv_dev->parent = pr_iucv->root; af_iucv_dev->release = (void (*)(struct device *))kfree; af_iucv_dev->driver = &af_iucv_driver; err = device_register(af_iucv_dev); if (err) goto out_driver; - return 0; out_driver: driver_unregister(&af_iucv_driver); +out_iucv: + pr_iucv->iucv_unregister(&af_iucv_handler, 0); +out: + return err; +} + +static int __init afiucv_init(void) +{ + int err; + + if (MACHINE_IS_VM) { + cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); + if (unlikely(err)) { + WARN_ON(err); + err = -EPROTONOSUPPORT; + goto out; + } + + pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv"); + if (!pr_iucv) { + printk(KERN_WARNING "iucv_if lookup failed\n"); + memset(&iucv_userid, 0, sizeof(iucv_userid)); + } + } else { + memset(&iucv_userid, 0, sizeof(iucv_userid)); + pr_iucv = NULL; + } + + err = proto_register(&iucv_proto, 0); + if (err) + goto out; + err = sock_register(&iucv_sock_family_ops); + if (err) + goto out_proto; + + if (pr_iucv) { + err = afiucv_iucv_init(); + if (err) + goto out_sock; + } + dev_add_pack(&iucv_packet_type); + return 0; + out_sock: sock_unregister(PF_IUCV); out_proto: proto_unregister(&iucv_proto); -out_iucv: - iucv_unregister(&af_iucv_handler, 0); out: + if (pr_iucv) + symbol_put(iucv_if); return err; } static void __exit afiucv_exit(void) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); + if (pr_iucv) { + device_unregister(af_iucv_dev); + driver_unregister(&af_iucv_driver); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); + symbol_put(iucv_if); + } + dev_remove_pack(&iucv_packet_type); sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); - iucv_unregister(&af_iucv_handler, 0); } module_init(afiucv_init); @@ -1793,3 +2428,4 @@ MODULE_DESCRIPTION("IUCV Sockets ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_IUCV); + diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 075a3808aa40..403be43b793d 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1974,6 +1974,27 @@ out: return rc; } +struct iucv_interface iucv_if = { + .message_receive = iucv_message_receive, + .__message_receive = __iucv_message_receive, + .message_reply = iucv_message_reply, + .message_reject = iucv_message_reject, + .message_send = iucv_message_send, + .__message_send = __iucv_message_send, + .message_send2way = iucv_message_send2way, + .message_purge = iucv_message_purge, + .path_accept = iucv_path_accept, + .path_connect = iucv_path_connect, + .path_quiesce = iucv_path_quiesce, + .path_resume = iucv_path_resume, + .path_sever = iucv_path_sever, + .iucv_register = iucv_register, + .iucv_unregister = iucv_unregister, + .bus = NULL, + .root = NULL, +}; +EXPORT_SYMBOL(iucv_if); + /** * iucv_init * @@ -2038,6 +2059,8 @@ static int __init iucv_init(void) rc = bus_register(&iucv_bus); if (rc) goto out_reboot; + iucv_if.root = iucv_root; + iucv_if.bus = &iucv_bus; return 0; out_reboot: diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 956b7e47dc52..8d0324bac01c 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -139,7 +139,8 @@ out: return lapb; } -int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks) +int lapb_register(struct net_device *dev, + const struct lapb_register_struct *callbacks) { struct lapb_cb *lapb; int rc = LAPB_BADTOKEN; @@ -158,7 +159,7 @@ int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks goto out; lapb->dev = dev; - lapb->callbacks = *callbacks; + lapb->callbacks = callbacks; __lapb_insert_cb(lapb); @@ -380,32 +381,32 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb) void lapb_connect_confirmation(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.connect_confirmation) - lapb->callbacks.connect_confirmation(lapb->dev, reason); + if (lapb->callbacks->connect_confirmation) + lapb->callbacks->connect_confirmation(lapb->dev, reason); } void lapb_connect_indication(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.connect_indication) - lapb->callbacks.connect_indication(lapb->dev, reason); + if (lapb->callbacks->connect_indication) + lapb->callbacks->connect_indication(lapb->dev, reason); } void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.disconnect_confirmation) - lapb->callbacks.disconnect_confirmation(lapb->dev, reason); + if (lapb->callbacks->disconnect_confirmation) + lapb->callbacks->disconnect_confirmation(lapb->dev, reason); } void lapb_disconnect_indication(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.disconnect_indication) - lapb->callbacks.disconnect_indication(lapb->dev, reason); + if (lapb->callbacks->disconnect_indication) + lapb->callbacks->disconnect_indication(lapb->dev, reason); } int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb) { - if (lapb->callbacks.data_indication) - return lapb->callbacks.data_indication(lapb->dev, skb); + if (lapb->callbacks->data_indication) + return lapb->callbacks->data_indication(lapb->dev, skb); kfree_skb(skb); return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */ @@ -415,8 +416,8 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb) { int used = 0; - if (lapb->callbacks.data_transmit) { - lapb->callbacks.data_transmit(lapb->dev, skb); + if (lapb->callbacks->data_transmit) { + lapb->callbacks->data_transmit(lapb->dev, skb); used = 1; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index e6cab51dceb0..7c366dfe8da9 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -69,7 +69,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (!tid_rx) return; - rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL); + RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", @@ -325,7 +325,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_SUCCESS; /* activate it for RX */ - rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); + RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); if (timeout) mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout)); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b57ddf941e59..567e3e54685a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -62,7 +62,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (type == NL80211_IFTYPE_AP_VLAN && params && params->use_4addr == 0) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); else if (type == NL80211_IFTYPE_STATION && params && params->use_4addr >= 0) sdata->u.mgd.use_4addr = params->use_4addr; @@ -556,7 +556,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.dtim_period = new->dtim_period; - rcu_assign_pointer(sdata->u.ap.beacon, new); + RCU_INIT_POINTER(sdata->u.ap.beacon, new); synchronize_rcu(); @@ -611,7 +611,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) if (!old) return -ENOENT; - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); synchronize_rcu(); kfree(old); @@ -877,7 +877,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -EBUSY; } - rcu_assign_pointer(vlansdata->u.vlan.sta, sta); + RCU_INIT_POINTER(vlansdata->u.vlan.sta, sta); } sta->sdata = vlansdata; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 56c24cabf26d..4f9235b18a03 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -84,7 +84,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, drv_reset_tsf(local); skb = ifibss->skb; - rcu_assign_pointer(ifibss->presp, NULL); + RCU_INIT_POINTER(ifibss->presp, NULL); synchronize_rcu(); skb->data = skb->head; skb->len = 0; @@ -184,7 +184,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, *pos++ = 0; /* U-APSD no in use */ } - rcu_assign_pointer(ifibss->presp, skb); + RCU_INIT_POINTER(ifibss->presp, skb); sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; @@ -995,7 +995,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) kfree(sdata->u.ibss.ie); skb = rcu_dereference_protected(sdata->u.ibss.presp, lockdep_is_held(&sdata->u.ibss.mtx)); - rcu_assign_pointer(sdata->u.ibss.presp, NULL); + RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index eaa80a3d412b..a33c58f5137c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -456,7 +456,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_BEACON_ENABLED); /* remove beacon */ - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); synchronize_rcu(); kfree(old_beacon); @@ -645,7 +645,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_subif_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, @@ -689,7 +689,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_monitor_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_select_queue = ieee80211_monitor_select_queue, diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 332b5ff1e885..7f54c5042235 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1168,6 +1168,6 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) void mesh_pathtbl_unregister(void) { /* no need for locking during exit path */ - mesh_table_free(rcu_dereference_raw(mesh_paths), true); - mesh_table_free(rcu_dereference_raw(mpp_paths), true); + mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true); + mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 42c196e86115..695447e988cb 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -72,7 +72,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, if (!s) return -ENOENT; if (s == sta) { - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], + RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], s->hnext); return 0; } @@ -82,7 +82,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, s = rcu_dereference_protected(s->hnext, lockdep_is_held(&local->sta_lock)); if (rcu_access_pointer(s->hnext)) { - rcu_assign_pointer(s->hnext, sta->hnext); + RCU_INIT_POINTER(s->hnext, sta->hnext); return 0; } @@ -231,7 +231,7 @@ static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); + RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } static void sta_unblock(struct work_struct *wk) @@ -799,7 +799,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) local->sta_generation++; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); if (sta->uploaded) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 899b71c0ff5d..3346829ea07f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -37,7 +37,7 @@ int nf_register_afinfo(const struct nf_afinfo *afinfo) err = mutex_lock_interruptible(&afinfo_mutex); if (err < 0) return err; - rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo); + RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo); mutex_unlock(&afinfo_mutex); return 0; } @@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(nf_register_afinfo); void nf_unregister_afinfo(const struct nf_afinfo *afinfo) { mutex_lock(&afinfo_mutex); - rcu_assign_pointer(nf_afinfo[afinfo->family], NULL); + RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL); mutex_unlock(&afinfo_mutex); synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index be43fd805bd0..2b771dc708a3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3771,6 +3771,7 @@ err_sock: void ip_vs_control_cleanup(void) { EnterFunction(2); + unregister_netdevice_notifier(&ip_vs_dst_notifier); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f7af8b866017..5acfaf59a9c3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -779,7 +779,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (exp->helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) - rcu_assign_pointer(help->helper, exp->helper); + RCU_INIT_POINTER(help->helper, exp->helper); } #ifdef CONFIG_NF_CONNTRACK_MARK @@ -1317,7 +1317,7 @@ static void nf_conntrack_cleanup_net(struct net *net) void nf_conntrack_cleanup(struct net *net) { if (net_eq(net, &init_net)) - rcu_assign_pointer(ip_ct_attach, NULL); + RCU_INIT_POINTER(ip_ct_attach, NULL); /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module @@ -1327,7 +1327,7 @@ void nf_conntrack_cleanup(struct net *net) nf_conntrack_cleanup_net(net); if (net_eq(net, &init_net)) { - rcu_assign_pointer(nf_ct_destroy, NULL); + RCU_INIT_POINTER(nf_ct_destroy, NULL); nf_conntrack_cleanup_init_net(); } } @@ -1576,11 +1576,11 @@ int nf_conntrack_init(struct net *net) if (net_eq(net, &init_net)) { /* For use by REJECT target */ - rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); - rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); + RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); /* Howto get NAT offsets */ - rcu_assign_pointer(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); } return 0; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 63a1b915a7e4..3add99439059 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -94,7 +94,7 @@ int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) ret = -EBUSY; goto out_unlock; } - rcu_assign_pointer(nf_conntrack_event_cb, new); + RCU_INIT_POINTER(nf_conntrack_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -112,7 +112,7 @@ void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) notify = rcu_dereference_protected(nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - rcu_assign_pointer(nf_conntrack_event_cb, NULL); + RCU_INIT_POINTER(nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -129,7 +129,7 @@ int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) ret = -EBUSY; goto out_unlock; } - rcu_assign_pointer(nf_expect_event_cb, new); + RCU_INIT_POINTER(nf_expect_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -147,7 +147,7 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) notify = rcu_dereference_protected(nf_expect_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - rcu_assign_pointer(nf_expect_event_cb, NULL); + RCU_INIT_POINTER(nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 05ecdc281a53..4605c947dcc4 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -169,7 +169,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type) before updating alloc_size */ type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align) + type->len; - rcu_assign_pointer(nf_ct_ext_types[type->id], type); + RCU_INIT_POINTER(nf_ct_ext_types[type->id], type); update_alloc_size(type); out: mutex_unlock(&nf_ct_ext_type_mutex); @@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(nf_ct_extend_register); void nf_ct_extend_unregister(struct nf_ct_ext_type *type) { mutex_lock(&nf_ct_ext_type_mutex); - rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); + RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 1bdfea357955..93c4bdbfc1ae 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -131,7 +131,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); goto out; } @@ -145,7 +145,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, memset(&help->help, 0, sizeof(help->help)); } - rcu_assign_pointer(help->helper, helper); + RCU_INIT_POINTER(help->helper, helper); out: return ret; } @@ -162,7 +162,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, lockdep_is_held(&nf_conntrack_lock) ) == me) { nf_conntrack_event(IPCT_HELPER, ct); - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); } return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7dec88a1755b..e58aa9b1fe8a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1125,7 +1125,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) if (help && help->helper) { /* we had a helper before ... */ nf_ct_remove_expectations(ct); - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); } return 0; @@ -1163,7 +1163,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return -EOPNOTSUPP; } - rcu_assign_pointer(help->helper, helper); + RCU_INIT_POINTER(help->helper, helper); return 0; } @@ -1386,7 +1386,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } /* not in hash table yet so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); + RCU_INIT_POINTER(help->helper, helper); } } else { /* try an implicit helper assignation */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 20714edf6cd2..ce0c406f58a8 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -55,7 +55,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) llog = rcu_dereference_protected(nf_loggers[pf], lockdep_is_held(&nf_log_mutex)); if (llog == NULL) - rcu_assign_pointer(nf_loggers[pf], logger); + RCU_INIT_POINTER(nf_loggers[pf], logger); } mutex_unlock(&nf_log_mutex); @@ -74,7 +74,7 @@ void nf_log_unregister(struct nf_logger *logger) c_logger = rcu_dereference_protected(nf_loggers[i], lockdep_is_held(&nf_log_mutex)); if (c_logger == logger) - rcu_assign_pointer(nf_loggers[i], NULL); + RCU_INIT_POINTER(nf_loggers[i], NULL); list_del(&logger->list[i]); } mutex_unlock(&nf_log_mutex); @@ -92,7 +92,7 @@ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[pf], logger); + RCU_INIT_POINTER(nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); return 0; } @@ -103,7 +103,7 @@ void nf_log_unbind_pf(u_int8_t pf) if (pf >= ARRAY_SIZE(nf_loggers)) return; mutex_lock(&nf_log_mutex); - rcu_assign_pointer(nf_loggers[pf], NULL); + RCU_INIT_POINTER(nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unbind_pf); @@ -250,7 +250,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[tindex], logger); + RCU_INIT_POINTER(nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5b466cd1272f..99ffd2885088 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -40,7 +40,7 @@ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) else if (old) ret = -EBUSY; else { - rcu_assign_pointer(queue_handler[pf], qh); + RCU_INIT_POINTER(queue_handler[pf], qh); ret = 0; } mutex_unlock(&queue_handler_mutex); @@ -65,7 +65,7 @@ int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) return -EINVAL; } - rcu_assign_pointer(queue_handler[pf], NULL); + RCU_INIT_POINTER(queue_handler[pf], NULL); mutex_unlock(&queue_handler_mutex); synchronize_rcu(); @@ -84,7 +84,7 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) queue_handler[pf], lockdep_is_held(&queue_handler_mutex) ) == qh) - rcu_assign_pointer(queue_handler[pf], NULL); + RCU_INIT_POINTER(queue_handler[pf], NULL); } mutex_unlock(&queue_handler_mutex); @@ -312,6 +312,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) } break; case NF_STOLEN: + break; default: kfree_skb(skb); } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 1905976b5135..c879c1a2370e 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) nfnl_unlock(); return -EBUSY; } - rcu_assign_pointer(subsys_table[n->subsys_id], n); + RCU_INIT_POINTER(subsys_table[n->subsys_id], n); nfnl_unlock(); return 0; @@ -210,7 +210,7 @@ static int __net_init nfnetlink_net_init(struct net *net) if (!nfnl) return -ENOMEM; net->nfnl_stash = nfnl; - rcu_assign_pointer(net->nfnl, nfnl); + RCU_INIT_POINTER(net->nfnl, nfnl); return 0; } @@ -219,7 +219,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) struct net *net; list_for_each_entry(net, net_exit_list, exit_list) - rcu_assign_pointer(net->nfnl, NULL); + RCU_INIT_POINTER(net->nfnl, NULL); synchronize_net(); list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->nfnl_stash); diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index ea750e9df65f..d2732fc952e2 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -1,8 +1,6 @@ # # Makefile for the NetLabel subsystem. # -# Feb 9, 2006, Paul Moore <paul.moore@hp.com> -# # base objects obj-y := netlabel_user.o netlabel_kapi.o diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index c0519139679e..96b749dacc34 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 2b9644e19de0..fdbc1d2c7352 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index dd53a36d89af..6bf878335d94 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index af7f3355103e..d24d774bfd62 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 2aa975e5452d..3f905e5370c2 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ @@ -282,7 +282,7 @@ int __init netlbl_domhsh_init(u32 size) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); spin_lock(&netlbl_domhsh_lock); - rcu_assign_pointer(netlbl_domhsh, hsh_tbl); + RCU_INIT_POINTER(netlbl_domhsh, hsh_tbl); spin_unlock(&netlbl_domhsh_lock); return 0; @@ -330,7 +330,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, &rcu_dereference(netlbl_domhsh)->tbl[bkt]); } else { INIT_LIST_HEAD(&entry->list); - rcu_assign_pointer(netlbl_domhsh_def, entry); + RCU_INIT_POINTER(netlbl_domhsh_def, entry); } if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { @@ -451,7 +451,7 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, if (entry != rcu_dereference(netlbl_domhsh_def)) list_del_rcu(&entry->list); else - rcu_assign_pointer(netlbl_domhsh_def, NULL); + RCU_INIT_POINTER(netlbl_domhsh_def, NULL); } else ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 0261dda3f2d2..bfcc0f7024c5 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index b528dd928d3c..9c24de10a657 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -5,7 +5,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ @@ -341,11 +341,11 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - return -ENOMEM; + goto out_entry; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_domain; } if (addr == NULL && mask == NULL) { @@ -354,13 +354,13 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, } else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_addrmap; INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); if (addrinfo == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_addrinfo; addrinfo->type_def.cipsov4 = doi_def; addrinfo->type = NETLBL_NLTYPE_CIPSOV4; addrinfo->list.addr = addr->s_addr & mask->s_addr; @@ -374,7 +374,7 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, entry->type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; - goto cfg_cipsov4_map_add_failure; + goto out_addrmap; } ret_val = netlbl_domhsh_add(entry, audit_info); @@ -384,11 +384,15 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, return 0; cfg_cipsov4_map_add_failure: - cipso_v4_doi_putdef(doi_def); + kfree(addrinfo); +out_addrinfo: + kfree(addrmap); +out_addrmap: kfree(entry->domain); +out_domain: kfree(entry); - kfree(addrmap); - kfree(addrinfo); +out_entry: + cipso_v4_doi_putdef(doi_def); return ret_val; } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index dff8a0809245..bfa555869775 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 8db37f4c10f7..5a9f31ce5799 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index f1ecf848e3ac..e251c2c88521 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ @@ -354,7 +354,7 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex) INIT_LIST_HEAD(&iface->list); if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL) goto add_iface_failure; - rcu_assign_pointer(netlbl_unlhsh_def, iface); + RCU_INIT_POINTER(netlbl_unlhsh_def, iface); } spin_unlock(&netlbl_unlhsh_lock); @@ -621,7 +621,7 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) if (iface->ifindex > 0) list_del_rcu(&iface->list); else - rcu_assign_pointer(netlbl_unlhsh_def, NULL); + RCU_INIT_POINTER(netlbl_unlhsh_def, NULL); spin_unlock(&netlbl_unlhsh_lock); call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); @@ -1449,7 +1449,7 @@ int __init netlbl_unlabel_init(u32 size) rcu_read_lock(); spin_lock(&netlbl_unlhsh_lock); - rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); + RCU_INIT_POINTER(netlbl_unlhsh, hsh_tbl); spin_unlock(&netlbl_unlhsh_lock); rcu_read_unlock(); diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 0bc8dc3f9e3c..700af49022a0 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index a3fd75ac3fa5..9fae63f10298 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index f4fc4c9ad567..81969785e279 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db0211da0..4330db99fabf 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1578,7 +1578,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups) new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); if (!new) return -ENOMEM; - old = rcu_dereference_raw(tbl->listeners); + old = rcu_dereference_protected(tbl->listeners, 1); memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); rcu_assign_pointer(tbl->listeners, new); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c698cec0a445..25e68f56b4ba 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -40,6 +40,10 @@ * byte arrays at the end of sockaddr_ll * and packet_mreq. * Johann Baudy : Added TX RING. + * Chetan Loke : Implemented TPACKET_V3 block abstraction + * layer. + * Copyright (C) 2011, <lokec@ccs.neu.edu> + * * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,9 +165,56 @@ struct packet_mreq_max { unsigned char mr_address[MAX_ADDR_LEN]; }; -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, +static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); + +#define V3_ALIGNMENT (8) + +#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) + +#define BLK_PLUS_PRIV(sz_of_priv) \ + (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) + +/* kbdq - kernel block descriptor queue */ +struct tpacket_kbdq_core { + struct pgv *pkbdq; + unsigned int feature_req_word; + unsigned int hdrlen; + unsigned char reset_pending_on_curr_blk; + unsigned char delete_blk_timer; + unsigned short kactive_blk_num; + unsigned short blk_sizeof_priv; + + /* last_kactive_blk_num: + * trick to see if user-space has caught up + * in order to avoid refreshing timer when every single pkt arrives. + */ + unsigned short last_kactive_blk_num; + + char *pkblk_start; + char *pkblk_end; + int kblk_size; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; + char *nxt_offset; + struct sk_buff *skb; + + atomic_t blk_fill_in_prog; + + /* Default is set to 8ms */ +#define DEFAULT_PRB_RETIRE_TOV (8) + + unsigned short retire_blk_tov; + unsigned short version; + unsigned long tov_in_jiffies; + + /* timer to retire an outstanding block */ + struct timer_list retire_blk_timer; +}; + +#define PGV_FROM_VMALLOC 1 struct pgv { char *buffer; }; @@ -179,12 +230,44 @@ struct packet_ring_buffer { unsigned int pg_vec_pages; unsigned int pg_vec_len; + struct tpacket_kbdq_core prb_bdqc; atomic_t pending; }; +#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) +#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) +#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) +#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) +#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) +#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) +#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) + struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static void *packet_previous_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status); +static void packet_increment_head(struct packet_ring_buffer *buff); +static int prb_curr_blk_in_use(struct tpacket_kbdq_core *, + struct tpacket_block_desc *); +static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, + struct packet_sock *); +static void prb_retire_current_block(struct tpacket_kbdq_core *, + struct packet_sock *, unsigned int status); +static int prb_queue_frozen(struct tpacket_kbdq_core *); +static void prb_open_block(struct tpacket_kbdq_core *, + struct tpacket_block_desc *); +static void prb_retire_rx_blk_timer_expired(unsigned long); +static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); +static void prb_init_blk_timer(struct packet_sock *, + struct tpacket_kbdq_core *, + void (*func) (unsigned long)); +static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); +static void prb_clear_rxhash(struct tpacket_kbdq_core *, + struct tpacket3_hdr *); +static void prb_fill_vlan_info(struct tpacket_kbdq_core *, + struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); struct packet_fanout; @@ -193,6 +276,7 @@ struct packet_sock { struct sock sk; struct packet_fanout *fanout; struct tpacket_stats stats; + union tpacket_stats_u stats_u; struct packet_ring_buffer rx_ring; struct packet_ring_buffer tx_ring; int copy_thresh; @@ -242,6 +326,15 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) +#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) +#define GET_PBLOCK_DESC(x, bid) \ + ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) +#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ + ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) +#define GET_NEXT_PRB_BLK_NUM(x) \ + (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ + ((x)->kactive_blk_num+1) : 0) + static inline struct packet_sock *pkt_sk(struct sock *sk) { return (struct packet_sock *)sk; @@ -325,8 +418,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) h.h2->tp_status = status; flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; + case TPACKET_V3: default: - pr_err("TPACKET version not supported\n"); + WARN(1, "TPACKET version not supported.\n"); BUG(); } @@ -351,8 +445,9 @@ static int __packet_get_status(struct packet_sock *po, void *frame) case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; + case TPACKET_V3: default: - pr_err("TPACKET version not supported\n"); + WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } @@ -389,6 +484,670 @@ static inline void *packet_current_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, rb->head, status); } +static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) +{ + del_timer_sync(&pkc->retire_blk_timer); +} + +static void prb_shutdown_retire_blk_timer(struct packet_sock *po, + int tx_ring, + struct sk_buff_head *rb_queue) +{ + struct tpacket_kbdq_core *pkc; + + pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + + spin_lock(&rb_queue->lock); + pkc->delete_blk_timer = 1; + spin_unlock(&rb_queue->lock); + + prb_del_retire_blk_timer(pkc); +} + +static void prb_init_blk_timer(struct packet_sock *po, + struct tpacket_kbdq_core *pkc, + void (*func) (unsigned long)) +{ + init_timer(&pkc->retire_blk_timer); + pkc->retire_blk_timer.data = (long)po; + pkc->retire_blk_timer.function = func; + pkc->retire_blk_timer.expires = jiffies; +} + +static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) +{ + struct tpacket_kbdq_core *pkc; + + if (tx_ring) + BUG(); + + pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); +} + +static int prb_calc_retire_blk_tmo(struct packet_sock *po, + int blk_size_in_bytes) +{ + struct net_device *dev; + unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; + struct ethtool_cmd ecmd; + int err; + + rtnl_lock(); + dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); + if (unlikely(!dev)) { + rtnl_unlock(); + return DEFAULT_PRB_RETIRE_TOV; + } + err = __ethtool_get_settings(dev, &ecmd); + rtnl_unlock(); + if (!err) { + switch (ecmd.speed) { + case SPEED_10000: + msec = 1; + div = 10000/1000; + break; + case SPEED_1000: + msec = 1; + div = 1000/1000; + break; + /* + * If the link speed is so slow you don't really + * need to worry about perf anyways + */ + case SPEED_100: + case SPEED_10: + default: + return DEFAULT_PRB_RETIRE_TOV; + } + } + + mbits = (blk_size_in_bytes * 8) / (1024 * 1024); + + if (div) + mbits /= div; + + tmo = mbits * msec; + + if (div) + return tmo+1; + return tmo; +} + +static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, + union tpacket_req_u *req_u) +{ + p1->feature_req_word = req_u->req3.tp_feature_req_word; +} + +static void init_prb_bdqc(struct packet_sock *po, + struct packet_ring_buffer *rb, + struct pgv *pg_vec, + union tpacket_req_u *req_u, int tx_ring) +{ + struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; + struct tpacket_block_desc *pbd; + + memset(p1, 0x0, sizeof(*p1)); + + p1->knxt_seq_num = 1; + p1->pkbdq = pg_vec; + pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; + p1->pkblk_start = (char *)pg_vec[0].buffer; + p1->kblk_size = req_u->req3.tp_block_size; + p1->knum_blocks = req_u->req3.tp_block_nr; + p1->hdrlen = po->tp_hdrlen; + p1->version = po->tp_version; + p1->last_kactive_blk_num = 0; + po->stats_u.stats3.tp_freeze_q_cnt = 0; + if (req_u->req3.tp_retire_blk_tov) + p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; + else + p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, + req_u->req3.tp_block_size); + p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); + p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + + prb_init_ft_ops(p1, req_u); + prb_setup_retire_blk_timer(po, tx_ring); + prb_open_block(p1, pbd); +} + +/* Do NOT update the last_blk_num first. + * Assumes sk_buff_head lock is held. + */ +static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) +{ + mod_timer(&pkc->retire_blk_timer, + jiffies + pkc->tov_in_jiffies); + pkc->last_kactive_blk_num = pkc->kactive_blk_num; +} + +/* + * Timer logic: + * 1) We refresh the timer only when we open a block. + * By doing this we don't waste cycles refreshing the timer + * on packet-by-packet basis. + * + * With a 1MB block-size, on a 1Gbps line, it will take + * i) ~8 ms to fill a block + ii) memcpy etc. + * In this cut we are not accounting for the memcpy time. + * + * So, if the user sets the 'tmo' to 10ms then the timer + * will never fire while the block is still getting filled + * (which is what we want). However, the user could choose + * to close a block early and that's fine. + * + * But when the timer does fire, we check whether or not to refresh it. + * Since the tmo granularity is in msecs, it is not too expensive + * to refresh the timer, lets say every '8' msecs. + * Either the user can set the 'tmo' or we can derive it based on + * a) line-speed and b) block-size. + * prb_calc_retire_blk_tmo() calculates the tmo. + * + */ +static void prb_retire_rx_blk_timer_expired(unsigned long data) +{ + struct packet_sock *po = (struct packet_sock *)data; + struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; + unsigned int frozen; + struct tpacket_block_desc *pbd; + + spin_lock(&po->sk.sk_receive_queue.lock); + + frozen = prb_queue_frozen(pkc); + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + if (unlikely(pkc->delete_blk_timer)) + goto out; + + /* We only need to plug the race when the block is partially filled. + * tpacket_rcv: + * lock(); increment BLOCK_NUM_PKTS; unlock() + * copy_bits() is in progress ... + * timer fires on other cpu: + * we can't retire the current block because copy_bits + * is in progress. + * + */ + if (BLOCK_NUM_PKTS(pbd)) { + while (atomic_read(&pkc->blk_fill_in_prog)) { + /* Waiting for skb_copy_bits to finish... */ + cpu_relax(); + } + } + + if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { + if (!frozen) { + prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); + if (!prb_dispatch_next_block(pkc, po)) + goto refresh_timer; + else + goto out; + } else { + /* Case 1. Queue was frozen because user-space was + * lagging behind. + */ + if (prb_curr_blk_in_use(pkc, pbd)) { + /* + * Ok, user-space is still behind. + * So just refresh the timer. + */ + goto refresh_timer; + } else { + /* Case 2. queue was frozen,user-space caught up, + * now the link went idle && the timer fired. + * We don't have a block to close.So we open this + * block and restart the timer. + * opening a block thaws the queue,restarts timer + * Thawing/timer-refresh is a side effect. + */ + prb_open_block(pkc, pbd); + goto out; + } + } + } + +refresh_timer: + _prb_refresh_rx_retire_blk_timer(pkc); + +out: + spin_unlock(&po->sk.sk_receive_queue.lock); +} + +static inline void prb_flush_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1, __u32 status) +{ + /* Flush everything minus the block header */ + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + u8 *start, *end; + + start = (u8 *)pbd1; + + /* Skip the block header(we know header WILL fit in 4K) */ + start += PAGE_SIZE; + + end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); + for (; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); + + smp_wmb(); +#endif + + /* Now update the block status. */ + + BLOCK_STATUS(pbd1) = status; + + /* Flush the block header */ + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + start = (u8 *)pbd1; + flush_dcache_page(pgv_to_page(start)); + + smp_wmb(); +#endif +} + +/* + * Side effect: + * + * 1) flush the block + * 2) Increment active_blk_num + * + * Note:We DONT refresh the timer on purpose. + * Because almost always the next block will be opened. + */ +static void prb_close_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1, + struct packet_sock *po, unsigned int stat) +{ + __u32 status = TP_STATUS_USER | stat; + + struct tpacket3_hdr *last_pkt; + struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + + if (po->stats.tp_drops) + status |= TP_STATUS_LOSING; + + last_pkt = (struct tpacket3_hdr *)pkc1->prev; + last_pkt->tp_next_offset = 0; + + /* Get the ts of the last pkt */ + if (BLOCK_NUM_PKTS(pbd1)) { + h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; + h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; + } else { + /* Ok, we tmo'd - so get the current time */ + struct timespec ts; + getnstimeofday(&ts); + h1->ts_last_pkt.ts_sec = ts.tv_sec; + h1->ts_last_pkt.ts_nsec = ts.tv_nsec; + } + + smp_wmb(); + + /* Flush the block */ + prb_flush_block(pkc1, pbd1, status); + + pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); +} + +static inline void prb_thaw_queue(struct tpacket_kbdq_core *pkc) +{ + pkc->reset_pending_on_curr_blk = 0; +} + +/* + * Side effect of opening a block: + * + * 1) prb_queue is thawed. + * 2) retire_blk_timer is refreshed. + * + */ +static void prb_open_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1) +{ + struct timespec ts; + struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + + smp_rmb(); + + if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { + + /* We could have just memset this but we will lose the + * flexibility of making the priv area sticky + */ + BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; + BLOCK_NUM_PKTS(pbd1) = 0; + BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + getnstimeofday(&ts); + h1->ts_first_pkt.ts_sec = ts.tv_sec; + h1->ts_first_pkt.ts_nsec = ts.tv_nsec; + pkc1->pkblk_start = (char *)pbd1; + pkc1->nxt_offset = (char *)(pkc1->pkblk_start + + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv)); + BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; + pbd1->version = pkc1->version; + pkc1->prev = pkc1->nxt_offset; + pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; + prb_thaw_queue(pkc1); + _prb_refresh_rx_retire_blk_timer(pkc1); + + smp_wmb(); + + return; + } + + WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", + pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); + dump_stack(); + BUG(); +} + +/* + * Queue freeze logic: + * 1) Assume tp_block_nr = 8 blocks. + * 2) At time 't0', user opens Rx ring. + * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 + * 4) user-space is either sleeping or processing block '0'. + * 5) tpacket_rcv is currently filling block '7', since there is no space left, + * it will close block-7,loop around and try to fill block '0'. + * call-flow: + * __packet_lookup_frame_in_block + * prb_retire_current_block() + * prb_dispatch_next_block() + * |->(BLOCK_STATUS == USER) evaluates to true + * 5.1) Since block-0 is currently in-use, we just freeze the queue. + * 6) Now there are two cases: + * 6.1) Link goes idle right after the queue is frozen. + * But remember, the last open_block() refreshed the timer. + * When this timer expires,it will refresh itself so that we can + * re-open block-0 in near future. + * 6.2) Link is busy and keeps on receiving packets. This is a simple + * case and __packet_lookup_frame_in_block will check if block-0 + * is free and can now be re-used. + */ +static inline void prb_freeze_queue(struct tpacket_kbdq_core *pkc, + struct packet_sock *po) +{ + pkc->reset_pending_on_curr_blk = 1; + po->stats_u.stats3.tp_freeze_q_cnt++; +} + +#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) + +/* + * If the next block is free then we will dispatch it + * and return a good offset. + * Else, we will freeze the queue. + * So, caller must check the return value. + */ +static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, + struct packet_sock *po) +{ + struct tpacket_block_desc *pbd; + + smp_rmb(); + + /* 1. Get current block num */ + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* 2. If this block is currently in_use then freeze the queue */ + if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { + prb_freeze_queue(pkc, po); + return NULL; + } + + /* + * 3. + * open this block and return the offset where the first packet + * needs to get stored. + */ + prb_open_block(pkc, pbd); + return (void *)pkc->nxt_offset; +} + +static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, + struct packet_sock *po, unsigned int status) +{ + struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* retire/close the current block */ + if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { + /* + * Plug the case where copy_bits() is in progress on + * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't + * have space to copy the pkt in the current block and + * called prb_retire_current_block() + * + * We don't need to worry about the TMO case because + * the timer-handler already handled this case. + */ + if (!(status & TP_STATUS_BLK_TMO)) { + while (atomic_read(&pkc->blk_fill_in_prog)) { + /* Waiting for skb_copy_bits to finish... */ + cpu_relax(); + } + } + prb_close_block(pkc, pbd, po, status); + return; + } + + WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); + dump_stack(); + BUG(); +} + +static inline int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, + struct tpacket_block_desc *pbd) +{ + return TP_STATUS_USER & BLOCK_STATUS(pbd); +} + +static inline int prb_queue_frozen(struct tpacket_kbdq_core *pkc) +{ + return pkc->reset_pending_on_curr_blk; +} + +static inline void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) +{ + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); + atomic_dec(&pkc->blk_fill_in_prog); +} + +static inline void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); +} + +static inline void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + ppd->hv1.tp_rxhash = 0; +} + +static inline void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + if (vlan_tx_tag_present(pkc->skb)) { + ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); + ppd->tp_status = TP_STATUS_VLAN_VALID; + } else { + ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; + } +} + +static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + prb_fill_vlan_info(pkc, ppd); + + if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) + prb_fill_rxhash(pkc, ppd); + else + prb_clear_rxhash(pkc, ppd); +} + +static inline void prb_fill_curr_block(char *curr, + struct tpacket_kbdq_core *pkc, + struct tpacket_block_desc *pbd, + unsigned int len) +{ + struct tpacket3_hdr *ppd; + + ppd = (struct tpacket3_hdr *)curr; + ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); + pkc->prev = curr; + pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); + BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); + BLOCK_NUM_PKTS(pbd) += 1; + atomic_inc(&pkc->blk_fill_in_prog); + prb_run_all_ft_ops(pkc, ppd); +} + +/* Assumes caller has the sk->rx_queue.lock */ +static void *__packet_lookup_frame_in_block(struct packet_sock *po, + struct sk_buff *skb, + int status, + unsigned int len + ) +{ + struct tpacket_kbdq_core *pkc; + struct tpacket_block_desc *pbd; + char *curr, *end; + + pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring)); + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* Queue is frozen when user space is lagging behind */ + if (prb_queue_frozen(pkc)) { + /* + * Check if that last block which caused the queue to freeze, + * is still in_use by user-space. + */ + if (prb_curr_blk_in_use(pkc, pbd)) { + /* Can't record this packet */ + return NULL; + } else { + /* + * Ok, the block was released by user-space. + * Now let's open that block. + * opening a block also thaws the queue. + * Thawing is a side effect. + */ + prb_open_block(pkc, pbd); + } + } + + smp_mb(); + curr = pkc->nxt_offset; + pkc->skb = skb; + end = (char *) ((char *)pbd + pkc->kblk_size); + + /* first try the current block */ + if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { + prb_fill_curr_block(curr, pkc, pbd, len); + return (void *)curr; + } + + /* Ok, close the current block */ + prb_retire_current_block(pkc, po, 0); + + /* Now, try to dispatch the next block */ + curr = (char *)prb_dispatch_next_block(pkc, po); + if (curr) { + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + prb_fill_curr_block(curr, pkc, pbd, len); + return (void *)curr; + } + + /* + * No free blocks are available.user_space hasn't caught up yet. + * Queue was just frozen and now this packet will get dropped. + */ + return NULL; +} + +static inline void *packet_current_rx_frame(struct packet_sock *po, + struct sk_buff *skb, + int status, unsigned int len) +{ + char *curr = NULL; + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + curr = packet_lookup_frame(po, &po->rx_ring, + po->rx_ring.head, status); + return curr; + case TPACKET_V3: + return __packet_lookup_frame_in_block(po, skb, status, len); + default: + WARN(1, "TPACKET version not supported\n"); + BUG(); + return 0; + } +} + +static inline void *prb_lookup_block(struct packet_sock *po, + struct packet_ring_buffer *rb, + unsigned int previous, + int status) +{ + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); + struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); + + if (status != BLOCK_STATUS(pbd)) + return NULL; + return pbd; +} + +static inline int prb_previous_blk_num(struct packet_ring_buffer *rb) +{ + unsigned int prev; + if (rb->prb_bdqc.kactive_blk_num) + prev = rb->prb_bdqc.kactive_blk_num-1; + else + prev = rb->prb_bdqc.knum_blocks-1; + return prev; +} + +/* Assumes caller has held the rx_queue.lock */ +static inline void *__prb_previous_block(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + unsigned int previous = prb_previous_blk_num(rb); + return prb_lookup_block(po, rb, previous, status); +} + +static inline void *packet_previous_rx_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + if (po->tp_version <= TPACKET_V2) + return packet_previous_frame(po, rb, status); + + return __prb_previous_block(po, rb, status); +} + +static inline void packet_increment_rx_head(struct packet_sock *po, + struct packet_ring_buffer *rb) +{ + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + return packet_increment_head(rb); + case TPACKET_V3: + default: + WARN(1, "TPACKET version not supported.\n"); + BUG(); + return; + } +} + static inline void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) @@ -982,12 +1741,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, union { struct tpacket_hdr *h1; struct tpacket2_hdr *h2; + struct tpacket3_hdr *h3; void *raw; } h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; - unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; + unsigned long status = TP_STATUS_USER; unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timeval tv; @@ -1033,37 +1793,46 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, po->tp_reserve; macoff = netoff - maclen; } - - if (macoff + snaplen > po->rx_ring.frame_size) { - if (po->copy_thresh && - atomic_read(&sk->sk_rmem_alloc) + skb->truesize < - (unsigned)sk->sk_rcvbuf) { - if (skb_shared(skb)) { - copy_skb = skb_clone(skb, GFP_ATOMIC); - } else { - copy_skb = skb_get(skb); - skb_head = skb->data; + if (po->tp_version <= TPACKET_V2) { + if (macoff + snaplen > po->rx_ring.frame_size) { + if (po->copy_thresh && + atomic_read(&sk->sk_rmem_alloc) + skb->truesize + < (unsigned)sk->sk_rcvbuf) { + if (skb_shared(skb)) { + copy_skb = skb_clone(skb, GFP_ATOMIC); + } else { + copy_skb = skb_get(skb); + skb_head = skb->data; + } + if (copy_skb) + skb_set_owner_r(copy_skb, sk); } - if (copy_skb) - skb_set_owner_r(copy_skb, sk); + snaplen = po->rx_ring.frame_size - macoff; + if ((int)snaplen < 0) + snaplen = 0; } - snaplen = po->rx_ring.frame_size - macoff; - if ((int)snaplen < 0) - snaplen = 0; } - spin_lock(&sk->sk_receive_queue.lock); - h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL); + h.raw = packet_current_rx_frame(po, skb, + TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto ring_is_full; - packet_increment_head(&po->rx_ring); + if (po->tp_version <= TPACKET_V2) { + packet_increment_rx_head(po, &po->rx_ring); + /* + * LOSING will be reported till you read the stats, + * because it's COR - Clear On Read. + * Anyways, moving it for V1/V2 only as V3 doesn't need this + * at packet level. + */ + if (po->stats.tp_drops) + status |= TP_STATUS_LOSING; + } po->stats.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } - if (!po->stats.tp_drops) - status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); @@ -1114,6 +1883,29 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; + case TPACKET_V3: + /* tp_nxt_offset,vlan are already populated above. + * So DONT clear those fields here + */ + h.h3->tp_status |= status; + h.h3->tp_len = skb->len; + h.h3->tp_snaplen = snaplen; + h.h3->tp_mac = macoff; + h.h3->tp_net = netoff; + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + ts = ktime_to_timespec(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + ts = ktime_to_timespec(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) + ts = ktime_to_timespec(skb->tstamp); + else + getnstimeofday(&ts); + h.h3->tp_sec = ts.tv_sec; + h.h3->tp_nsec = ts.tv_nsec; + hdrlen = sizeof(*h.h3); + break; default: BUG(); } @@ -1134,13 +1926,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, { u8 *start, *end; - end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); - for (start = h.raw; start < end; start += PAGE_SIZE) - flush_dcache_page(pgv_to_page(start)); + if (po->tp_version <= TPACKET_V2) { + end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + + macoff + snaplen); + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); + } smp_wmb(); } #endif - __packet_set_status(po, h.raw, status); + if (po->tp_version <= TPACKET_V2) + __packet_set_status(po, h.raw, status); + else + prb_clear_blk_fill_status(&po->rx_ring); sk->sk_data_ready(sk, 0); @@ -1631,7 +2429,7 @@ static int packet_release(struct socket *sock) struct sock *sk = sock->sk; struct packet_sock *po; struct net *net; - struct tpacket_req req; + union tpacket_req_u req_u; if (!sk) return 0; @@ -1654,13 +2452,13 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req, 0, sizeof(req)); + memset(&req_u, 0, sizeof(req_u)); if (po->rx_ring.pg_vec) - packet_set_ring(sk, &req, 1, 0); + packet_set_ring(sk, &req_u, 1, 0); if (po->tx_ring.pg_vec) - packet_set_ring(sk, &req, 1, 1); + packet_set_ring(sk, &req_u, 1, 1); fanout_release(sk); @@ -2280,15 +3078,27 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv case PACKET_RX_RING: case PACKET_TX_RING: { - struct tpacket_req req; + union tpacket_req_u req_u; + int len; - if (optlen < sizeof(req)) + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + len = sizeof(req_u.req); + break; + case TPACKET_V3: + default: + len = sizeof(req_u.req3); + break; + } + if (optlen < len) return -EINVAL; if (pkt_sk(sk)->has_vnet_hdr) return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) + if (copy_from_user(&req_u.req, optval, len)) return -EFAULT; - return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); + return packet_set_ring(sk, &req_u, 0, + optname == PACKET_TX_RING); } case PACKET_COPY_THRESH: { @@ -2315,6 +3125,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv switch (val) { case TPACKET_V1: case TPACKET_V2: + case TPACKET_V3: po->tp_version = val; return 0; default: @@ -2424,6 +3235,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, struct packet_sock *po = pkt_sk(sk); void *data; struct tpacket_stats st; + union tpacket_stats_u st_u; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -2436,15 +3248,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case PACKET_STATISTICS: - if (len > sizeof(struct tpacket_stats)) - len = sizeof(struct tpacket_stats); + if (po->tp_version == TPACKET_V3) { + len = sizeof(struct tpacket_stats_v3); + } else { + if (len > sizeof(struct tpacket_stats)) + len = sizeof(struct tpacket_stats); + } spin_lock_bh(&sk->sk_receive_queue.lock); - st = po->stats; + if (po->tp_version == TPACKET_V3) { + memcpy(&st_u.stats3, &po->stats, + sizeof(struct tpacket_stats)); + st_u.stats3.tp_freeze_q_cnt = + po->stats_u.stats3.tp_freeze_q_cnt; + st_u.stats3.tp_packets += po->stats.tp_drops; + data = &st_u.stats3; + } else { + st = po->stats; + st.tp_packets += st.tp_drops; + data = &st; + } memset(&po->stats, 0, sizeof(st)); spin_unlock_bh(&sk->sk_receive_queue.lock); - st.tp_packets += st.tp_drops; - - data = &st; break; case PACKET_AUXDATA: if (len > sizeof(int)) @@ -2485,6 +3309,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case TPACKET_V2: val = sizeof(struct tpacket2_hdr); break; + case TPACKET_V3: + val = sizeof(struct tpacket3_hdr); + break; default: return -EINVAL; } @@ -2641,7 +3468,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock, spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { - if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) + if (!packet_previous_rx_frame(po, &po->rx_ring, + TP_STATUS_KERNEL)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -2760,7 +3588,7 @@ out_free_pgvec: goto out; } -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, +static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring) { struct pgv *pg_vec = NULL; @@ -2769,7 +3597,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; __be16 num; - int err; + int err = -EINVAL; + /* Added to avoid minimal code churn */ + struct tpacket_req *req = &req_u->req; + + /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ + if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { + WARN(1, "Tx-ring is not supported.\n"); + goto out; + } rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; @@ -2795,6 +3631,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, case TPACKET_V2: po->tp_hdrlen = TPACKET2_HDRLEN; break; + case TPACKET_V3: + po->tp_hdrlen = TPACKET3_HDRLEN; + break; } err = -EINVAL; @@ -2820,6 +3659,17 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; + switch (po->tp_version) { + case TPACKET_V3: + /* Transmit path is not supported. We checked + * it above but just being paranoid + */ + if (!tx_ring) + init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); + break; + default: + break; + } } /* Done */ else { @@ -2872,7 +3722,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, register_prot_hook(sk); } spin_unlock(&po->bind_lock); - + if (closing && (po->tp_version > TPACKET_V2)) { + /* Because we don't support block-based V3 on tx-ring */ + if (!tx_ring) + prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue); + } release_sock(sk); if (pg_vec) diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index c6fffd946d42..bf10ea8fbbf9 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -480,7 +480,7 @@ int __init_or_module phonet_proto_register(unsigned int protocol, if (proto_tab[protocol]) err = -EBUSY; else - rcu_assign_pointer(proto_tab[protocol], pp); + RCU_INIT_POINTER(proto_tab[protocol], pp); mutex_unlock(&proto_tab_lock); return err; @@ -491,7 +491,7 @@ void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) { mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[protocol] != pp); - rcu_assign_pointer(proto_tab[protocol], NULL); + RCU_INIT_POINTER(proto_tab[protocol], NULL); mutex_unlock(&proto_tab_lock); synchronize_rcu(); proto_unregister(pp->prot); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index d2df8f33160b..c5827614376b 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -276,7 +276,7 @@ static void phonet_route_autodel(struct net_device *dev) mutex_lock(&pnn->routes.lock); for (i = 0; i < 64; i++) if (dev == pnn->routes.table[i]) { - rcu_assign_pointer(pnn->routes.table[i], NULL); + RCU_INIT_POINTER(pnn->routes.table[i], NULL); set_bit(i, deleted); } mutex_unlock(&pnn->routes.lock); @@ -390,7 +390,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr) daddr = daddr >> 2; mutex_lock(&routes->lock); if (routes->table[daddr] == NULL) { - rcu_assign_pointer(routes->table[daddr], dev); + RCU_INIT_POINTER(routes->table[daddr], dev); dev_hold(dev); err = 0; } @@ -406,7 +406,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr) daddr = daddr >> 2; mutex_lock(&routes->lock); if (dev == routes->table[daddr]) - rcu_assign_pointer(routes->table[daddr], NULL); + RCU_INIT_POINTER(routes->table[daddr], NULL); else dev = NULL; mutex_unlock(&routes->lock); diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ab07711cf2f4..676d18dc75b7 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -679,7 +679,7 @@ int pn_sock_bind_res(struct sock *sk, u8 res) mutex_lock(&resource_mutex); if (pnres.sk[res] == NULL) { sock_hold(sk); - rcu_assign_pointer(pnres.sk[res], sk); + RCU_INIT_POINTER(pnres.sk[res], sk); ret = 0; } mutex_unlock(&resource_mutex); @@ -695,7 +695,7 @@ int pn_sock_unbind_res(struct sock *sk, u8 res) mutex_lock(&resource_mutex); if (pnres.sk[res] == sk) { - rcu_assign_pointer(pnres.sk[res], NULL); + RCU_INIT_POINTER(pnres.sk[res], NULL); ret = 0; } mutex_unlock(&resource_mutex); @@ -714,7 +714,7 @@ void pn_sock_unbind_all_res(struct sock *sk) mutex_lock(&resource_mutex); for (res = 0; res < 256; res++) { if (pnres.sk[res] == sk) { - rcu_assign_pointer(pnres.sk[res], NULL); + RCU_INIT_POINTER(pnres.sk[res], NULL); match++; } } diff --git a/net/rds/Kconfig b/net/rds/Kconfig index ec753b3ae72a..4cf6dc7910e4 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -9,6 +9,7 @@ config RDS config RDS_RDMA tristate "RDS over Infiniband and iWARP" + select LLIST depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS ---help--- Allow RDS to use Infiniband and iWARP as a transport. diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 819c35a0d9cb..e8fdb172adbb 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -33,10 +33,10 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/rculist.h> +#include <linux/llist.h> #include "rds.h" #include "ib.h" -#include "xlist.h" static DEFINE_PER_CPU(unsigned long, clean_list_grace); #define CLEAN_LIST_BUSY_BIT 0 @@ -49,7 +49,7 @@ struct rds_ib_mr { struct rds_ib_mr_pool *pool; struct ib_fmr *fmr; - struct xlist_head xlist; + struct llist_node llnode; /* unmap_list is for freeing */ struct list_head unmap_list; @@ -71,9 +71,9 @@ struct rds_ib_mr_pool { atomic_t item_count; /* total # of MRs */ atomic_t dirty_count; /* # dirty of MRs */ - struct xlist_head drop_list; /* MRs that have reached their max_maps limit */ - struct xlist_head free_list; /* unused MRs */ - struct xlist_head clean_list; /* global unused & unamapped MRs */ + struct llist_head drop_list; /* MRs that have reached their max_maps limit */ + struct llist_head free_list; /* unused MRs */ + struct llist_head clean_list; /* global unused & unamapped MRs */ wait_queue_head_t flush_wait; atomic_t free_pinned; /* memory pinned by free MRs */ @@ -220,9 +220,9 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) if (!pool) return ERR_PTR(-ENOMEM); - INIT_XLIST_HEAD(&pool->free_list); - INIT_XLIST_HEAD(&pool->drop_list); - INIT_XLIST_HEAD(&pool->clean_list); + init_llist_head(&pool->free_list); + init_llist_head(&pool->drop_list); + init_llist_head(&pool->clean_list); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); @@ -260,26 +260,18 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) kfree(pool); } -static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl, - struct rds_ib_mr **ibmr_ret) -{ - struct xlist_head *ibmr_xl; - ibmr_xl = xlist_del_head_fast(xl); - *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist); -} - static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; - struct xlist_head *ret; + struct llist_node *ret; unsigned long *flag; preempt_disable(); flag = &__get_cpu_var(clean_list_grace); set_bit(CLEAN_LIST_BUSY_BIT, flag); - ret = xlist_del_head(&pool->clean_list); + ret = llist_del_first(&pool->clean_list); if (ret) - ibmr = list_entry(ret, struct rds_ib_mr, xlist); + ibmr = llist_entry(ret, struct rds_ib_mr, llnode); clear_bit(CLEAN_LIST_BUSY_BIT, flag); preempt_enable(); @@ -529,46 +521,44 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr } /* - * given an xlist of mrs, put them all into the list_head for more processing + * given an llist of mrs, put them all into the list_head for more processing */ -static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list) +static void llist_append_to_list(struct llist_head *llist, struct list_head *list) { struct rds_ib_mr *ibmr; - struct xlist_head splice; - struct xlist_head *cur; - struct xlist_head *next; - - splice.next = NULL; - xlist_splice(xlist, &splice); - cur = splice.next; - while (cur) { - next = cur->next; - ibmr = list_entry(cur, struct rds_ib_mr, xlist); + struct llist_node *node; + struct llist_node *next; + + node = llist_del_all(llist); + while (node) { + next = node->next; + ibmr = llist_entry(node, struct rds_ib_mr, llnode); list_add_tail(&ibmr->unmap_list, list); - cur = next; + node = next; } } /* - * this takes a list head of mrs and turns it into an xlist of clusters. - * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for - * reuse. + * this takes a list head of mrs and turns it into linked llist nodes + * of clusters. Each cluster has linked llist nodes of + * MR_CLUSTER_SIZE mrs that are ready for reuse. */ -static void list_append_to_xlist(struct rds_ib_mr_pool *pool, - struct list_head *list, struct xlist_head *xlist, - struct xlist_head **tail_ret) +static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, + struct list_head *list, + struct llist_node **nodes_head, + struct llist_node **nodes_tail) { struct rds_ib_mr *ibmr; - struct xlist_head *cur_mr = xlist; - struct xlist_head *tail_mr = NULL; + struct llist_node *cur = NULL; + struct llist_node **next = nodes_head; list_for_each_entry(ibmr, list, unmap_list) { - tail_mr = &ibmr->xlist; - tail_mr->next = NULL; - cur_mr->next = tail_mr; - cur_mr = tail_mr; + cur = &ibmr->llnode; + *next = cur; + next = &cur->next; } - *tail_ret = tail_mr; + *next = NULL; + *nodes_tail = cur; } /* @@ -581,8 +571,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **ibmr_ret) { struct rds_ib_mr *ibmr, *next; - struct xlist_head clean_xlist; - struct xlist_head *clean_tail; + struct llist_node *clean_nodes; + struct llist_node *clean_tail; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); unsigned long unpinned = 0; @@ -603,7 +593,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, prepare_to_wait(&pool->flush_wait, &wait, TASK_UNINTERRUPTIBLE); - if (xlist_empty(&pool->clean_list)) + if (llist_empty(&pool->clean_list)) schedule(); ibmr = rds_ib_reuse_fmr(pool); @@ -628,10 +618,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, /* Get the list of all MRs to be dropped. Ordering matters - * we want to put drop_list ahead of free_list. */ - xlist_append_to_list(&pool->drop_list, &unmap_list); - xlist_append_to_list(&pool->free_list, &unmap_list); + llist_append_to_list(&pool->drop_list, &unmap_list); + llist_append_to_list(&pool->free_list, &unmap_list); if (free_all) - xlist_append_to_list(&pool->clean_list, &unmap_list); + llist_append_to_list(&pool->clean_list, &unmap_list); free_goal = rds_ib_flush_goal(pool, free_all); @@ -663,22 +653,22 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, if (!list_empty(&unmap_list)) { /* we have to make sure that none of the things we're about * to put on the clean list would race with other cpus trying - * to pull items off. The xlist would explode if we managed to + * to pull items off. The llist would explode if we managed to * remove something from the clean list and then add it back again - * while another CPU was spinning on that same item in xlist_del_head. + * while another CPU was spinning on that same item in llist_del_first. * - * This is pretty unlikely, but just in case wait for an xlist grace period + * This is pretty unlikely, but just in case wait for an llist grace period * here before adding anything back into the clean list. */ wait_clean_list_grace(); - list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail); + list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) - refill_local(pool, &clean_xlist, ibmr_ret); + *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); - /* refill_local may have emptied our list */ - if (!xlist_empty(&clean_xlist)) - xlist_add(clean_xlist.next, clean_tail, &pool->clean_list); + /* more than one entry in llist nodes */ + if (clean_nodes->next) + llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); } @@ -711,9 +701,9 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* Return it to the pool's free list */ if (ibmr->remap_count >= pool->fmr_attr.max_maps) - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list); + llist_add(&ibmr->llnode, &pool->drop_list); else - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list); + llist_add(&ibmr->llnode, &pool->free_list); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); diff --git a/net/rds/xlist.h b/net/rds/xlist.h deleted file mode 100644 index e6b5190daddd..000000000000 --- a/net/rds/xlist.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _LINUX_XLIST_H -#define _LINUX_XLIST_H - -#include <linux/stddef.h> -#include <linux/poison.h> -#include <linux/prefetch.h> -#include <asm/system.h> - -struct xlist_head { - struct xlist_head *next; -}; - -static inline void INIT_XLIST_HEAD(struct xlist_head *list) -{ - list->next = NULL; -} - -static inline int xlist_empty(struct xlist_head *head) -{ - return head->next == NULL; -} - -static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail, - struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - - while (1) { - cur = head->next; - tail->next = cur; - check = cmpxchg(&head->next, cur, new); - if (check == cur) - break; - } -} - -static inline struct xlist_head *xlist_del_head(struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - struct xlist_head *next; - - while (1) { - cur = head->next; - if (!cur) - goto out; - - next = cur->next; - check = cmpxchg(&head->next, cur, next); - if (check == cur) - goto out; - } -out: - return cur; -} - -static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head) -{ - struct xlist_head *cur; - - cur = head->next; - if (!cur) - return NULL; - - head->next = cur->next; - return cur; -} - -static inline void xlist_splice(struct xlist_head *list, - struct xlist_head *head) -{ - struct xlist_head *cur; - - WARN_ON(head->next); - cur = xchg(&list->next, NULL); - head->next = cur; -} - -#endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 102fc212cd64..e051398fdf6b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - dev_queue_xmit(skb2); - err = 0; + err = dev_queue_xmit(skb2); out: if (err) { diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2a318f2dc3e5..b5d56a22b1d2 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -112,7 +112,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) { struct Qdisc *qdisc = q->queues[prio]; - struct sk_buff *skb = qdisc->dequeue(qdisc); + struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); sch->q.qlen--; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0a833d0c1f61..e83c272c0325 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -287,6 +287,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) u32 r, slot, salt, sfbhash; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + if (unlikely(sch->q.qlen >= q->limit)) { + sch->qstats.overlimits++; + q->stats.queuedrop++; + goto drop; + } + if (q->rehash_interval > 0) { unsigned long limit = q->rehash_time + q->rehash_interval; @@ -332,12 +338,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot ^= 1; sfb_skb_cb(skb)->hashes[slot] = 0; - if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { + if (unlikely(minqlen >= q->max)) { sch->qstats.overlimits++; - if (minqlen >= q->max) - q->stats.bucketdrop++; - else - q->stats.queuedrop++; + q->stats.bucketdrop++; goto drop; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 4536ee64383e..4f5510e2bd6f 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -410,7 +410,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* Return Congestion Notification only if we dropped a packet * from this flow. */ - return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; + if (qlen != slot->qlen) + return NET_XMIT_CN; + + /* As we dropped a packet, better let upper stack know this */ + qdisc_tree_decrease_qlen(sch, 1); + return NET_XMIT_SUCCESS; } static struct sk_buff * diff --git a/net/sctp/associola.c b/net/sctp/associola.c index dc16b90ddb6f..152b5b3c3fff 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -282,6 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.asconf_capable = 1; asoc->asconf_addr_del_pending = NULL; asoc->src_out_of_asoc_ok = 0; + asoc->new_transport = NULL; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a6d27bf563a5..14c2b06028ff 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -917,6 +917,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) * current cwnd). */ if (!list_empty(&q->retransmit)) { + if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED) + goto sctp_flush_out; if (transport == asoc->peer.retran_path) goto retran; @@ -989,6 +991,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) ((new_transport->state == SCTP_INACTIVE) || (new_transport->state == SCTP_UNCONFIRMED))) new_transport = asoc->peer.active_path; + if (new_transport->state == SCTP_UNCONFIRMED) + continue; /* Change packets if necessary. */ if (new_transport != transport) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 81db4e385352..0121e0ab0351 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3015,6 +3015,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, /* Start the heartbeat timer. */ if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) sctp_transport_hold(peer); + asoc->new_transport = peer; break; case SCTP_PARAM_DEL_IP: /* ADDIP 4.3 D7) If a request is received to delete the diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 49b847b00f99..73d14fc02606 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3612,6 +3612,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, */ asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); + if (asoc->new_transport) { + sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, + commands); + ((struct sctp_association *)asoc)->new_transport = NULL; + } return SCTP_DISPOSITION_CONSUME; } diff --git a/net/socket.c b/net/socket.c index b1cbbcd92558..2517e11a5300 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1871,8 +1871,14 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) +struct used_address { + struct sockaddr_storage name; + unsigned int name_len; +}; + static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, - struct msghdr *msg_sys, unsigned flags, int nosec) + struct msghdr *msg_sys, unsigned flags, + struct used_address *used_address) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -1953,8 +1959,28 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) msg_sys->msg_flags |= MSG_DONTWAIT; - err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, - total_len); + /* + * If this is sendmmsg() and current destination address is same as + * previously succeeded address, omit asking LSM's decision. + * used_address->name_len is initialized to UINT_MAX so that the first + * destination address never matches. + */ + if (used_address && used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg->msg_name, + used_address->name_len)) { + err = sock_sendmsg_nosec(sock, msg_sys, total_len); + goto out_freectl; + } + err = sock_sendmsg(sock, msg_sys, total_len); + /* + * If this is sendmmsg() and sending to current destination address was + * successful, remember it. + */ + if (used_address && err >= 0) { + used_address->name_len = msg_sys->msg_namelen; + memcpy(&used_address->name, msg->msg_name, + used_address->name_len); + } out_freectl: if (ctl_buf != ctl) @@ -1979,7 +2005,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (!sock) goto out; - err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); + err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL); fput_light(sock->file, fput_needed); out: @@ -1998,6 +2024,10 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct mmsghdr __user *entry; struct compat_mmsghdr __user *compat_entry; struct msghdr msg_sys; + struct used_address used_address; + + if (vlen > UIO_MAXIOV) + vlen = UIO_MAXIOV; datagrams = 0; @@ -2005,27 +2035,22 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; - err = sock_error(sock->sk); - if (err) - goto out_put; - + used_address.name_len = UINT_MAX; entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; + err = 0; while (datagrams < vlen) { - /* - * No need to ask LSM for more than the first datagram. - */ if (MSG_CMSG_COMPAT & flags) { err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = __put_user(err, &compat_entry->msg_len); ++compat_entry; } else { err = __sys_sendmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = put_user(err, &entry->msg_len); @@ -2037,29 +2062,11 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, ++datagrams; } -out_put: fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; - - if (datagrams != 0) { - /* - * We may send less entries than requested (vlen) if the - * sock is non blocking... - */ - if (err != -EAGAIN) { - /* - * ... or if sendmsg returns an error after we - * send some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - + /* We only return an error if no datagrams were able to be sent */ + if (datagrams != 0) return datagrams; - } return err; } @@ -2463,7 +2470,7 @@ int sock_register(const struct net_proto_family *ops) lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - rcu_assign_pointer(net_families[ops->family], ops); + RCU_INIT_POINTER(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); @@ -2491,7 +2498,7 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - rcu_assign_pointer(net_families[family], NULL); + RCU_INIT_POINTER(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 364eb45e989d..d4132754cbe1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -122,7 +122,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) return; gss_get_ctx(ctx); - rcu_assign_pointer(gss_cred->gc_ctx, ctx); + RCU_INIT_POINTER(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); smp_mb__before_clear_bit(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); @@ -970,7 +970,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = gss_cred->gc_ctx; - rcu_assign_pointer(gss_cred->gc_ctx, NULL); + RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9b6a4d1ea8f8..f4385e45a5fc 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -187,6 +187,7 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport + * @xprt: pointer to the target transport * * This prevents mixing the payload of separate requests, and prevents * transport connects from colliding with writes. No congestion control diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 58064d9e565d..791ab2e77f3f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -462,8 +462,8 @@ static struct xfrm_algo_desc ealg_list[] = { .desc = { .sadb_alg_id = SADB_X_EALG_AESCTR, .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256 + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 288 } }, }; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index fc91ad7ee26e..f781b9ab8a54 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -70,26 +70,29 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) while ((scratch += len, dlen -= len) > 0) { skb_frag_t *frag; + struct page *page; err = -EMSGSIZE; if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) goto out; frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; - frag->page = alloc_page(GFP_ATOMIC); + page = alloc_page(GFP_ATOMIC); err = -ENOMEM; - if (!frag->page) + if (!page) goto out; + __skb_frag_set_page(frag, page); + len = PAGE_SIZE; if (dlen < len) len = dlen; - memcpy(page_address(frag->page), scratch, len); - frag->page_offset = 0; frag->size = len; + memcpy(skb_frag_address(frag), scratch, len); + skb->truesize += len; skb->data_len += len; skb->len += len; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0256b8a0a7cf..d0a42df5160e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2927,7 +2927,7 @@ static int __net_init xfrm_user_net_init(struct net *net) if (nlsk == NULL) return -ENOMEM; net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ - rcu_assign_pointer(net->xfrm.nlsk, nlsk); + RCU_INIT_POINTER(net->xfrm.nlsk, nlsk); return 0; } @@ -2935,7 +2935,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) { struct net *net; list_for_each_entry(net, net_exit_list, exit_list) - rcu_assign_pointer(net->xfrm.nlsk, NULL); + RCU_INIT_POINTER(net->xfrm.nlsk, NULL); synchronize_net(); list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->xfrm.nlsk_stash); |