From da882c1f2ecadb0ed582628ec1585e36b137c0f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Apr 2012 23:38:54 +0000 Subject: tcp: sk_add_backlog() is too agressive for TCP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While investigating TCP performance problems on 10Gb+ links, we found a tcp sender was dropping lot of incoming ACKS because of sk_rcvbuf limit in sk_add_backlog(), especially if receiver doesnt use GRO/LRO and sends one ACK every two MSS segments. A sender usually tweaks sk_sndbuf, but sk_rcvbuf stays at its default value (87380), allowing a too small backlog. A TCP ACK, even being small, can consume nearly same truesize space than outgoing packets. Using sk_rcvbuf + sk_sndbuf as a limit makes sense and is fast to compute. Performance results on netperf, single flow, receiver with disabled GRO/LRO : 7500 Mbits instead of 6050 Mbits, no more TCPBacklogDrop increments at sender. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Cc: Maciej Żenczykowski Cc: Yuchung Cheng Cc: Ilpo Järvinen Cc: Rick Jones Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 917607e9bd5b..cf97e9821d76 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1752,7 +1752,8 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf))) { + } else if (unlikely(sk_add_backlog(sk, skb, + sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b04e6d8a8371..5fb19d345cfd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1654,7 +1654,8 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else if (unlikely(sk_add_backlog(sk, skb, sk->sk_rcvbuf))) { + } else if (unlikely(sk_add_backlog(sk, skb, + sk->sk_rcvbuf + sk->sk_sndbuf))) { bh_unlock_sock(sk); NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; -- cgit v1.2.3