From 793245eeb97bd28e363f2b0f2e766fdbff0c9619 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 16 Aug 2005 20:39:38 -0700 Subject: [IPV6]: Fix raw socket hardware checksum failures When packets hit raw sockets the csum update isn't done yet, do it manually. Packets can also reach rawv6_rcv on the output path through ip6_call_ra_chain, in this case skb->ip_summed is CHECKSUM_NONE and this codepath isn't executed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/raw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e2b848ec985..1d4d75b34d3 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -328,6 +328,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (skb->ip_summed != CHECKSUM_UNNECESSARY) { if (skb->ip_summed == CHECKSUM_HW) { + skb_postpull_rcsum(skb, skb->nh.raw, + skb->h.raw - skb->nh.raw); skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, -- cgit v1.2.3 From c8ac37746489f05a32a958b048f29ae45487e81e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 16 Aug 2005 20:43:40 -0700 Subject: [TCP]: Fix bug #5070: kernel BUG at net/ipv4/tcp_output.c:864 1) We send out a normal sized packet with TSO on to start off. 2) ICMP is received indicating a smaller MTU. 3) We send the current sk_send_head which needs to be fragmented since it was created before the ICMP event. The first fragment is then sent out. At this point the remaining fragment is allocated by tcp_fragment. However, its size is padded to fit the L1 cache-line size therefore creating tail-room up to 124 bytes long. This fragment will also be sitting at sk_send_head. 4) tcp_sendmsg is called again and it stores data in the tail-room of of the fragment. 5) tcp_push_one is called by tcp_sendmsg which then calls tso_fragment since the packet as a whole exceeds the MTU. At this point we have a packet that has data in the head area being fed to tso_fragment which bombs out. My take on this is that we shouldn't ever call tcp_fragment on a TSO socket for a packet that is yet to be transmitted since this creates a packet on sk_send_head that cannot be extended. So here is a patch to change it so that tso_fragment is always used in this case. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3ed6fc15815..566045e5843 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -861,7 +861,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, u16 flags; /* All of a TSO frame must be composed of paged data. */ - BUG_ON(skb->len != skb->data_len); + if (skb->len != skb->data_len) + return tcp_fragment(sk, skb, len, mss_now); buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); if (unlikely(buff == NULL)) @@ -974,6 +975,8 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) sent_pkts = 0; while ((skb = sk->sk_send_head)) { + unsigned int limit; + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -994,9 +997,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) break; } + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1004,15 +1008,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (tso_fragment(sk, skb, limit, mss_now)) - break; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) - break; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + break; + TCP_SKB_CB(skb)->when = tcp_time_stamp; if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) @@ -1064,11 +1065,14 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); if (likely(cwnd_quota)) { + unsigned int limit; + BUG_ON(!tso_segs); + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1076,15 +1080,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (unlikely(tso_fragment(sk, skb, limit, mss_now))) - return; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) - return; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + return; + /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; -- cgit v1.2.3 From fad87acaea7b0965fe91f0351fdd688fc9761cbe Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 16 Aug 2005 21:03:41 -0700 Subject: [IPV6]: Fix SKB leak in ip6_input_finish() Changing it to how ip_input handles should fix it. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 866f10726c5..10fbb50daea 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -198,12 +198,13 @@ resubmit: if (!raw_sk) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); - icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff); + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_UNK_NEXTHDR, nhoff, + skb->dev); } - } else { + } else IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); - } + kfree_skb(skb); } rcu_read_unlock(); return 0; -- cgit v1.2.3