summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArjun Roy <arjunroy@google.com>2020-12-02 14:53:48 -0800
committerJakub Kicinski <kuba@kernel.org>2020-12-04 13:40:53 -0800
commit0c3936d32f754c6e9068a25b7823dc4b5bc42607 (patch)
tree1f425d352aa1cd9ad342dc985174483667bb606e
parentf21a3c48039891c02063fe6dc3c3a2f8f344b345 (diff)
net-zerocopy: Set zerocopy hint when data is copied
Set zerocopy hint, event when falling back to copy, so that the pending data can be efficiently received using zerocopy when possible. Signed-off-by: Arjun Roy <arjunroy@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/ipv4/tcp.c45
1 files changed, 45 insertions, 0 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 05ec65698b95..df6dd15a5988 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1800,6 +1800,43 @@ static int find_next_mappable_frag(const skb_frag_t *frag,
return offset;
}
+static void tcp_zerocopy_set_hint_for_skb(struct sock *sk,
+ struct tcp_zerocopy_receive *zc,
+ struct sk_buff *skb, u32 offset)
+{
+ u32 frag_offset, partial_frag_remainder = 0;
+ int mappable_offset;
+ skb_frag_t *frag;
+
+ /* worst case: skip to next skb. try to improve on this case below */
+ zc->recv_skip_hint = skb->len - offset;
+
+ /* Find the frag containing this offset (and how far into that frag) */
+ frag = skb_advance_to_frag(skb, offset, &frag_offset);
+ if (!frag)
+ return;
+
+ if (frag_offset) {
+ struct skb_shared_info *info = skb_shinfo(skb);
+
+ /* We read part of the last frag, must recvmsg() rest of skb. */
+ if (frag == &info->frags[info->nr_frags - 1])
+ return;
+
+ /* Else, we must at least read the remainder in this frag. */
+ partial_frag_remainder = skb_frag_size(frag) - frag_offset;
+ zc->recv_skip_hint -= partial_frag_remainder;
+ ++frag;
+ }
+
+ /* partial_frag_remainder: If part way through a frag, must read rest.
+ * mappable_offset: Bytes till next mappable frag, *not* counting bytes
+ * in partial_frag_remainder.
+ */
+ mappable_offset = find_next_mappable_frag(frag, zc->recv_skip_hint);
+ zc->recv_skip_hint = mappable_offset + partial_frag_remainder;
+}
+
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags,
struct scm_timestamping_internal *tss,
@@ -1830,6 +1867,14 @@ static int receive_fallback_to_copy(struct sock *sk,
return err;
zc->copybuf_len = err;
+ if (likely(zc->copybuf_len)) {
+ struct sk_buff *skb;
+ u32 offset;
+
+ skb = tcp_recv_skb(sk, tcp_sk(sk)->copied_seq, &offset);
+ if (skb)
+ tcp_zerocopy_set_hint_for_skb(sk, zc, skb, offset);
+ }
return 0;
}