From 797659fb4a4a511649cd71028141c32ad1698a12 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 10 Mar 2007 15:56:08 -0300 Subject: [PPPOE]: Introduce pppoe_hdr() For consistency with all the other skb->nh.raw accessors. Also do some really obvious simplifications in pppoe_recvmsg, well the kfree_skb one is not so obvious, but free() and kfree() have the same behaviour (hint :-) ). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'drivers/net/pppoe.c') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index ebfa2967cd6..3080a44b23a 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -347,7 +347,7 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) struct pppox_sock *relay_po = NULL; if (sk->sk_state & PPPOX_BOUND) { - struct pppoe_hdr *ph = (struct pppoe_hdr *) skb->nh.raw; + struct pppoe_hdr *ph = pppoe_hdr(skb); int len = ntohs(ph->length); skb_pull_rcsum(skb, sizeof(struct pppoe_hdr)); if (pskb_trim_rcsum(skb, len)) @@ -401,7 +401,7 @@ static int pppoe_rcv(struct sk_buff *skb, if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; - ph = (struct pppoe_hdr *) skb->nh.raw; + ph = pppoe_hdr(skb); po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (po != NULL) @@ -433,7 +433,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb, if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; - ph = (struct pppoe_hdr *) skb->nh.raw; + ph = pppoe_hdr(skb); if (ph->code != PADT_CODE) goto abort; @@ -931,8 +931,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb = NULL; int error = 0; - int len; - struct pppoe_hdr *ph = NULL; if (sk->sk_state & PPPOX_BOUND) { error = -EIO; @@ -949,19 +947,15 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, m->msg_namelen = 0; if (skb) { - error = 0; - ph = (struct pppoe_hdr *) skb->nh.raw; - len = ntohs(ph->length); + struct pppoe_hdr *ph = pppoe_hdr(skb); + const int len = ntohs(ph->length); error = memcpy_toiovec(m->msg_iov, (unsigned char *) &ph->tag[0], len); - if (error < 0) - goto do_skb_free; - error = len; + if (error == 0) + error = len; } -do_skb_free: - if (skb) - kfree_skb(skb); + kfree_skb(skb); end: return error; } -- cgit v1.2.3 From c1d2bbe1cd6c7bbdc6d532cefebb66c7efb789ce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Apr 2007 20:45:18 -0700 Subject: [SK_BUFF]: Introduce skb_reset_network_header(skb) For the common, open coded 'skb->nh.raw = skb->data' operation, so that we can later turn skb->nh.raw into a offset, reducing the size of struct sk_buff in 64bit land while possibly keeping it as a pointer on 32bit. This one touches just the most simple case, next will handle the slightly more "complex" cases. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/pppoe.c') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 3080a44b23a..e94790632d5 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -799,7 +799,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock, /* Reserve space for headers. */ skb_reserve(skb, dev->hard_header_len); - skb->nh.raw = skb->data; + skb_reset_network_header(skb); skb->dev = dev; @@ -884,7 +884,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) memcpy(ph, &hdr, sizeof(struct pppoe_hdr)); skb2->protocol = __constant_htons(ETH_P_PPP_SES); - skb2->nh.raw = skb2->data; + skb_reset_network_header(skb2); skb2->dev = dev; -- cgit v1.2.3 From d626f62b11e00c16e81e4308ab93d3f13551812a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Mar 2007 18:55:52 -0300 Subject: [SK_BUFF]: Introduce skb_copy_from_linear_data{_offset} To clearly state the intent of copying from linear sk_buffs, _offset being a overly long variant but interesting for the sake of saving some bytes. Signed-off-by: Arnaldo Carvalho de Melo --- drivers/net/pppoe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/pppoe.c') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index e94790632d5..e9fb616ff66 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -869,7 +869,8 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) goto abort; skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr)); - memcpy(skb_put(skb2, skb->len), skb->data, skb->len); + skb_copy_from_linear_data(skb, skb_put(skb2, skb->len), + skb->len); } else { /* Make a clone so as to not disturb the original skb, * give dev_queue_xmit something it can free. -- cgit v1.2.3 From bfafb26e11849fe99e03cc1902a91f7f65354e47 Mon Sep 17 00:00:00 2001 From: Florian Zumbiehl Date: Fri, 20 Apr 2007 16:56:31 -0700 Subject: [PPPoE]: miscellaneous smaller cleanups below is a patch that just removes dead code/initializers without any effect (first access is an assignment) that I stumbled accross while reading the source. Signed-off-by: Florian Zumbiehl Acked-by: Michal Ostrowski Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'drivers/net/pppoe.c') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index e9fb616ff66..f761a9aae4c 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -207,7 +207,7 @@ static inline struct pppox_sock *get_item(unsigned long sid, static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp) { - struct net_device *dev = NULL; + struct net_device *dev; int ifindex; dev = dev_get_by_name(sp->sa_addr.pppoe.dev); @@ -222,9 +222,6 @@ static inline int set_item(struct pppox_sock *po) { int i; - if (!po) - return -EINVAL; - write_lock_bh(&pppoe_hash_lock); i = __set_item(po); write_unlock_bh(&pppoe_hash_lock); @@ -344,7 +341,7 @@ static struct notifier_block pppoe_notifier = { static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); - struct pppox_sock *relay_po = NULL; + struct pppox_sock *relay_po; if (sk->sk_state & PPPOX_BOUND) { struct pppoe_hdr *ph = pppoe_hdr(skb); @@ -514,7 +511,6 @@ static int pppoe_release(struct socket *sock) { struct sock *sk = sock->sk; struct pppox_sock *po; - int error = 0; if (!sk) return 0; @@ -543,7 +539,7 @@ static int pppoe_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); sock_put(sk); - return error; + return 0; } @@ -762,10 +758,10 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { - struct sk_buff *skb = NULL; + struct sk_buff *skb; struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); - int error = 0; + int error; struct pppoe_hdr hdr; struct pppoe_hdr *ph; struct net_device *dev; @@ -930,7 +926,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct sock *sk = sock->sk; - struct sk_buff *skb = NULL; + struct sk_buff *skb; int error = 0; if (sk->sk_state & PPPOX_BOUND) { @@ -941,9 +937,8 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &error); - if (error < 0) { + if (error < 0) goto end; - } m->msg_namelen = 0; @@ -986,7 +981,7 @@ out: static __inline__ struct pppox_sock *pppoe_get_idx(loff_t pos) { - struct pppox_sock *po = NULL; + struct pppox_sock *po; int i = 0; for (; i < PPPOE_HASH_SIZE; i++) { -- cgit v1.2.3 From 74b885cf86def9bc836772e3c1788c00b72a35c9 Mon Sep 17 00:00:00 2001 From: Florian Zumbiehl Date: Fri, 20 Apr 2007 16:57:27 -0700 Subject: [PPPOE]: race between interface going down and connect() below you find a patch that (hopefully) fixes a race between an interface going down and a connect() to a peer on that interface. Before, connect() would determine that an interface is up, then the interface could go down and all entries referring to that interface in the item_hash_table would be marked as ZOMBIEs and their references to the device would be freed, and after that, connect() would put a new entry into the hash table referring to the device that meanwhile is down already - which also would cause unregister_netdevice() to wait until the socket has been release()d. This patch does not suffice if we are not allowed to accept connect()s referring to a device that we already acked a NETDEV_GOING_DOWN for (that is: all references are only guaranteed to be freed after NETDEV_DOWN has been acknowledged, not necessarily after the NETDEV_GOING_DOWN already). And if we are allowed to, we could avoid looking through the hash table upon NETDEV_GOING_DOWN completely and only do that once we get the NETDEV_DOWN ... mostrows: pppoe_flush_dev is called on NETDEV_GOING_DOWN and NETDEV_DOWN to deal with this "late connect" issue. Ideally one would hope to notify users at the "NETDEV_GOING_DOWN" phase (just to pretend to be nice). However, it is the NETDEV_DOWN scan that takes all the responsibility for ensuring nobody is hanging around at that time. Signed-off-by: Florian Zumbiehl Acked-by: Michal Ostrowski Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'drivers/net/pppoe.c') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index f761a9aae4c..bc4fc30bc85 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -218,17 +218,6 @@ static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp) return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex); } -static inline int set_item(struct pppox_sock *po) -{ - int i; - - write_lock_bh(&pppoe_hash_lock); - i = __set_item(po); - write_unlock_bh(&pppoe_hash_lock); - - return i; -} - static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int ifindex) { struct pppox_sock *ret; @@ -595,14 +584,18 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, po->pppoe_dev = dev; po->pppoe_ifindex = dev->ifindex; - if (!(dev->flags & IFF_UP)) + write_lock_bh(&pppoe_hash_lock); + if (!(dev->flags & IFF_UP)){ + write_unlock_bh(&pppoe_hash_lock); goto err_put; + } memcpy(&po->pppoe_pa, &sp->sa_addr.pppoe, sizeof(struct pppoe_addr)); - error = set_item(po); + error = __set_item(po); + write_unlock_bh(&pppoe_hash_lock); if (error < 0) goto err_put; -- cgit v1.2.3 From 42dc9cd54b7290f862874a2544e50395e5719985 Mon Sep 17 00:00:00 2001 From: Michal Ostrowski Date: Fri, 20 Apr 2007 16:59:24 -0700 Subject: [PPPOE]: Fix device tear-down notification. pppoe_flush_dev() kicks all sockets bound to a device that is going down. In doing so, locks must be taken in the right order consistently (sock lock, followed by the pppoe_hash_lock). However, the scan process is based on us holding the sock lock. So, when something is found in the scan we must release the lock we're holding and grab the sock lock. This patch fixes race conditions between this code and pppoe_release(), both of which perform similar functions but would naturally prefer to grab locks in opposing orders. Both code paths are now going after these locks in a consistent manner. pppoe_hash_lock protects the contents of the "pppox_sock" objects that reside inside the hash. Thus, NULL'ing out the pppoe_dev field should be done under the protection of this lock. Signed-off-by: Michal Ostrowski Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 87 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 37 deletions(-) (limited to 'drivers/net/pppoe.c') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index bc4fc30bc85..6f98834e6ac 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -241,54 +241,53 @@ static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int static void pppoe_flush_dev(struct net_device *dev) { int hash; - BUG_ON(dev == NULL); - read_lock_bh(&pppoe_hash_lock); + write_lock_bh(&pppoe_hash_lock); for (hash = 0; hash < PPPOE_HASH_SIZE; hash++) { struct pppox_sock *po = item_hash_table[hash]; while (po != NULL) { - if (po->pppoe_dev == dev) { - struct sock *sk = sk_pppox(po); - - sock_hold(sk); - po->pppoe_dev = NULL; + struct sock *sk = sk_pppox(po); + if (po->pppoe_dev != dev) { + po = po->next; + continue; + } + po->pppoe_dev = NULL; + dev_put(dev); - /* We hold a reference to SK, now drop the - * hash table lock so that we may attempt - * to lock the socket (which can sleep). - */ - read_unlock_bh(&pppoe_hash_lock); - lock_sock(sk); + /* We always grab the socket lock, followed by the + * pppoe_hash_lock, in that order. Since we should + * hold the sock lock while doing any unbinding, + * we need to release the lock we're holding. + * Hold a reference to the sock so it doesn't disappear + * as we're jumping between locks. + */ - if (sk->sk_state & - (PPPOX_CONNECTED | PPPOX_BOUND)) { - pppox_unbind_sock(sk); - dev_put(dev); - sk->sk_state = PPPOX_ZOMBIE; - sk->sk_state_change(sk); - } + sock_hold(sk); - release_sock(sk); + write_unlock_bh(&pppoe_hash_lock); + lock_sock(sk); - sock_put(sk); + if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { + pppox_unbind_sock(sk); + sk->sk_state = PPPOX_ZOMBIE; + sk->sk_state_change(sk); + } - read_lock_bh(&pppoe_hash_lock); + release_sock(sk); + sock_put(sk); - /* Now restart from the beginning of this - * hash chain. We always NULL out pppoe_dev - * so we are guaranteed to make forward - * progress. - */ - po = item_hash_table[hash]; - continue; - } - po = po->next; + /* Restart scan at the beginning of this hash chain. + * While the lock was dropped the chain contents may + * have changed. + */ + write_lock_bh(&pppoe_hash_lock); + po = item_hash_table[hash]; } } - read_unlock_bh(&pppoe_hash_lock); + write_unlock_bh(&pppoe_hash_lock); } static int pppoe_device_event(struct notifier_block *this, @@ -504,28 +503,42 @@ static int pppoe_release(struct socket *sock) if (!sk) return 0; - if (sock_flag(sk, SOCK_DEAD)) + lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD)){ + release_sock(sk); return -EBADF; + } pppox_unbind_sock(sk); /* Signal the death of the socket. */ sk->sk_state = PPPOX_DEAD; + + /* Write lock on hash lock protects the entire "po" struct from + * concurrent updates via pppoe_flush_dev. The "po" struct should + * be considered part of the hash table contents, thus protected + * by the hash table lock */ + write_lock_bh(&pppoe_hash_lock); + po = pppox_sk(sk); if (po->pppoe_pa.sid) { - delete_item(po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); + __delete_item(po->pppoe_pa.sid, + po->pppoe_pa.remote, po->pppoe_ifindex); } - if (po->pppoe_dev) + if (po->pppoe_dev) { dev_put(po->pppoe_dev); + po->pppoe_dev = NULL; + } - po->pppoe_dev = NULL; + write_unlock_bh(&pppoe_hash_lock); sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_receive_queue); + release_sock(sk); sock_put(sk); return 0; -- cgit v1.2.3