From 5472c3c6a5f0573a609f77adce8ed1bd54233c7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 May 2019 19:17:33 -0700 Subject: tcp: use this_cpu_read(*X) instead of *this_cpu_ptr(X) this_cpu_read(*X) is slightly faster than *this_cpu_ptr(X) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index af81e4a6a8d8..59b7edd8719c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -771,7 +771,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); if (sk) ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; @@ -863,7 +863,7 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); if (sk) ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; -- cgit v1.2.3 From a842fe1425cb20f457abd3f8ef98b468f83ca98b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2019 11:57:25 -0700 Subject: tcp: add optional per socket transmit delay Adding delays to TCP flows is crucial for studying behavior of TCP stacks, including congestion control modules. Linux offers netem module, but it has unpractical constraints : - Need root access to change qdisc - Hard to setup on egress if combined with non trivial qdisc like FQ - Single delay for all flows. EDT (Earliest Departure Time) adoption in TCP stack allows us to enable a per socket delay at a very small cost. Networking tools can now establish thousands of flows, each of them with a different delay, simulating real world conditions. This requires FQ packet scheduler or a EDT-enabled NIC. This patchs adds TCP_TX_DELAY socket option, to set a delay in usec units. unsigned int tx_delay = 10000; /* 10 msec */ setsockopt(fd, SOL_TCP, TCP_TX_DELAY, &tx_delay, sizeof(tx_delay)); Note that FQ packet scheduler limits might need some tweaking : man tc-fq PARAMETERS limit Hard limit on the real queue size. When this limit is reached, new packets are dropped. If the value is lowered, packets are dropped so that the new limit is met. Default is 10000 packets. flow_limit Hard limit on the maximum number of packets queued per flow. Default value is 100. Use of TCP_TX_DELAY option will increase number of skbs in FQ qdisc, so packets would be dropped if any of the previous limit is hit. Use of a jump label makes this support runtime-free, for hosts never using the option. Also note that TSQ (TCP Small Queues) limits are slightly changed with this patch : we need to account that skbs artificially delayed wont stop us providind more skbs to feed the pipe (netem uses skb_orphan_partial() for this purpose, but FQ can not use this trick) Because of that, using big delays might very well trigger old bugs in TSO auto defer logic and/or sndbuf limited detection. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f059fbd81a84..1b7e9e1fbd3b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -767,9 +767,11 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); - if (sk) + if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; + tcp_set_tx_time(skb, sk); + } ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, @@ -859,9 +861,9 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); - if (sk) - ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_mark : sk->sk_mark; + ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_mark : sk->sk_mark; + tcp_set_tx_time(skb, sk); ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, -- cgit v1.2.3 From d6fb396cfaa71afc9f38d573b8ec6409fe3716de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Jun 2019 21:22:35 -0700 Subject: ipv4: tcp: fix ACK/RST sent with a transmit delay If we want to set a EDT time for the skb we want to send via ip_send_unicast_reply(), we have to pass a new parameter and initialize ipc.sockc.transmit_time with it. This fixes the EDT time for ACK/RST packets sent on behalf of a TIME_WAIT socket. Fixes: a842fe1425cb ("tcp: add optional per socket transmit delay") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1b7e9e1fbd3b..633e8244ed5b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -662,8 +662,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif - struct net *net; + u64 transmit_time = 0; struct sock *ctl_sk; + struct net *net; /* Never send a reset in response to a reset. */ if (th->rst) @@ -770,12 +771,13 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; - tcp_set_tx_time(skb, sk); + transmit_time = tcp_transmit_time(sk); } ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - &arg, arg.iov[0].iov_len); + &arg, arg.iov[0].iov_len, + transmit_time); ctl_sk->sk_mark = 0; __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); @@ -810,6 +812,7 @@ static void tcp_v4_send_ack(const struct sock *sk, struct net *net = sock_net(sk); struct ip_reply_arg arg; struct sock *ctl_sk; + u64 transmit_time; memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -863,11 +866,12 @@ static void tcp_v4_send_ack(const struct sock *sk, ctl_sk = this_cpu_read(*net->ipv4.tcp_sk); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; - tcp_set_tx_time(skb, sk); + transmit_time = tcp_transmit_time(sk); ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - &arg, arg.iov[0].iov_len); + &arg, arg.iov[0].iov_len, + transmit_time); ctl_sk->sk_mark = 0; __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); -- cgit v1.2.3