summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorArjun Roy <arjunroy@google.com>2020-02-14 15:30:49 -0800
committerDavid S. Miller <davem@davemloft.net>2020-02-16 19:25:02 -0800
commitc8856c051454909e5059df4e81c77b9c366c5515 (patch)
tree72b78bb1392b3d2209b387599d8f15fbb2fcae5c /net/ipv4/tcp.c
parent8c8da5b8eaf80b9958de96cce189abd4bd1aaff1 (diff)
tcp-zerocopy: Return inq along with tcp receive zerocopy.
This patchset is intended to reduce the number of extra system calls imposed by TCP receive zerocopy. For ping-pong RPC style workloads, this patchset has demonstrated a system call reduction of about 30% when coupled with userspace changes. For applications using edge-triggered epoll, returning inq along with the result of tcp receive zerocopy could remove the need to call recvmsg()=-EAGAIN after a successful zerocopy. Generally speaking, since normally we would need to perform a recvmsg() call for every successful small RPC read via TCP receive zerocopy, returning inq can reduce the number of system calls performed by approximately half. Signed-off-by: Arjun Roy <arjunroy@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb2d80519f8e..a697f1455f8d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3667,13 +3667,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
if (get_user(len, optlen))
return -EFAULT;
- if (len != sizeof(zc))
+ if (len < offsetofend(struct tcp_zerocopy_receive, length))
return -EINVAL;
+ if (len > sizeof(zc))
+ len = sizeof(zc);
if (copy_from_user(&zc, optval, len))
return -EFAULT;
lock_sock(sk);
err = tcp_zerocopy_receive(sk, &zc);
release_sock(sk);
+ switch (len) {
+ case sizeof(zc):
+ case offsetofend(struct tcp_zerocopy_receive, inq):
+ goto zerocopy_rcv_inq;
+ case offsetofend(struct tcp_zerocopy_receive, length):
+ default:
+ goto zerocopy_rcv_out;
+ }
+zerocopy_rcv_inq:
+ zc.inq = tcp_inq_hint(sk);
+zerocopy_rcv_out:
if (!err && copy_to_user(optval, &zc, len))
err = -EFAULT;
return err;